diff --git a/docs/compatibility.md b/docs/compatibility.md
new file mode 100644
index 00000000..eec5ebd1
--- /dev/null
+++ b/docs/compatibility.md
@@ -0,0 +1,13 @@
+# Compatibility
+
+## v0.3.1
+
+### InternData-N1 update to v0.5
+
+The InternData-N1 VLN-PE trajectory training dataset has been upgraded from `v0.1` to `v0.5`. This update introduces minor structural changes in the dataset layout and updates the LeRobot-to-LMDB conversion logic to match the new `v0.5` data structure.
+
+The training pipeline now uses the new key name:
+- `instruction_text` → `task`
+
+The updated conversion logic is **not compatible** with InternData-N1 `v0.1`.
+
diff --git a/internnav/dataset/cma_lerobot_dataset.py b/internnav/dataset/cma_lerobot_dataset.py
index e56c433b..4dac012b 100644
--- a/internnav/dataset/cma_lerobot_dataset.py
+++ b/internnav/dataset/cma_lerobot_dataset.py
@@ -6,7 +6,7 @@
 
 from internnav.dataset.base import BaseDataset, ObservationsDict, _block_shuffle
 from internnav.model.utils.feature_extract import extract_instruction_tokens
-from internnav.utils.lerobot_as_lmdb import LerobotAsLmdb
+from internnav.utils.loader import LerobotAsLmdb
 
 
 class CMALerobotDataset(BaseDataset):
@@ -38,8 +38,9 @@ def __init__(
         self.camera_name = self.config.il.camera_name
 
         self.lerobot_as_lmdb = LerobotAsLmdb(self.lerobot_features_dir)
-        self.lmdb_keys = self.lerobot_as_lmdb.get_all_keys()
+        self.lmdb_keys = self.lerobot_as_lmdb.get_all_keys(allow_scan_list=['r2r']) # r2r / r2r_aliengo / r2r_flash
         self.length = len(self.lmdb_keys)
+        print(f"total keys in traj_data: {len(self.lmdb_keys)}")
 
         # For CMA-CLIP
         self.use_clip_encoders = False
diff --git a/internnav/dataset/rdp_lerobot_dataset.py b/internnav/dataset/rdp_lerobot_dataset.py
index d6afbc8b..16ca81c6 100644
--- a/internnav/dataset/rdp_lerobot_dataset.py
+++ b/internnav/dataset/rdp_lerobot_dataset.py
@@ -26,7 +26,7 @@
 from internnav.model.basemodel.LongCLIP.model import longclip
 from internnav.model.utils.feature_extract import extract_instruction_tokens
 from internnav.utils.geometry_utils import get_delta, normalize_data, to_local_coords
-from internnav.utils.lerobot_as_lmdb import LerobotAsLmdb
+from internnav.utils.loader import LerobotAsLmdb
 
 
 def _convert_image_to_rgb(image):
@@ -103,8 +103,9 @@ def __init__(
         self.to_pil = ToPILImage()
         self.image_processor = _transform(n_px=224)  # copy from clip-long
         self.lerobot_as_lmdb = LerobotAsLmdb(self.lerobot_features_dir)
-        self.lmdb_keys = self.lerobot_as_lmdb.get_all_keys()
+        self.lmdb_keys = self.lerobot_as_lmdb.get_all_keys(allow_scan_list=['r2r']) # r2r / r2r_aliengo / r2r_flash
         self.length = len(self.lmdb_keys)
+        print(f"total keys in traj_data: {len(self.lmdb_keys)}")
 
         self.start = 0
         self.end = self.length
@@ -192,7 +193,7 @@ def _load_next(self):  # noqa: C901
                 episodes_in_json = data_to_load['episodes_in_json']
 
                 instructions = [
-                    episodes_in_json[ep_idx]['instruction_text'][: self.config.model.text_encoder.max_length]
+                    episodes_in_json[ep_idx]['task'][: self.config.model.text_encoder.max_length]
                     for ep_idx in range(len(episodes_in_json))
                 ]
 
diff --git a/internnav/utils/loader.py b/internnav/utils/loader.py
new file mode 100644
index 00000000..c4aa27cf
--- /dev/null
+++ b/internnav/utils/loader.py
@@ -0,0 +1,370 @@
+      
+# Accessing the lerobot dataset using the LMDB interface
+import os
+import pandas as pd
+import numpy as np
+import json
+import cv2
+from PIL import Image, ImageDraw, ImageFont
+
+
+def get_font(font_size):
+    """Get font, prioritize fonts that support multiple languages"""
+    font_paths = [
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
+        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
+    ]
+    try:
+        for path in font_paths:
+            if os.path.exists(path):
+                return ImageFont.truetype(path, font_size)
+        return ImageFont.load_default()
+    except:
+        return ImageFont.load_default()
+
+
+def wrap_text(text, font, max_width):
+    """Wrap text to fit specified width"""
+    lines = []
+    words = text.split(' ')
+    current_line = ""
+    
+    for word in words:
+        test_line = current_line + " " + word if current_line else word
+        bbox = font.getbbox(test_line)
+        text_width = bbox[2] - bbox[0]
+        
+        if text_width <= max_width:
+            current_line = test_line
+        else:
+            if current_line:
+                lines.append(current_line)
+            current_line = word
+    
+    if current_line:
+        lines.append(current_line)
+    
+    return lines
+
+
+def calculate_text_height(text, font, max_width, line_spacing=5):
+    """Calculate total height of text after wrapping"""
+    wrapped_lines = wrap_text(text, font, max_width)
+    bbox = font.getbbox("Ag")
+    line_height = (bbox[3] - bbox[1]) + line_spacing
+    return len(wrapped_lines) * line_height, wrapped_lines
+
+
+def find_optimal_font_size(text, panel_width, panel_height, margin=10, title_height=35, min_font_size=10, max_font_size=24):
+    """Find the maximum font size that allows text to be fully displayed"""
+    available_width = panel_width - 2 * margin
+    available_height = panel_height - title_height - 2 * margin
+    
+    for font_size in range(max_font_size, min_font_size - 1, -1):
+        font = get_font(font_size)
+        text_height, wrapped_lines = calculate_text_height(text, font, available_width, line_spacing=5)
+        
+        if text_height <= available_height:
+            return font_size, font, wrapped_lines
+    
+    font = get_font(min_font_size)
+    _, wrapped_lines = calculate_text_height(text, font, available_width, line_spacing=5)
+    return min_font_size, font, wrapped_lines
+
+
+def create_text_panel(text, panel_width, panel_height, frame_info="", bg_color=(30, 30, 30), text_color=(255, 255, 255)):
+    """Create text panel with automatic font size adjustment"""
+    panel = Image.new('RGB', (panel_width, panel_height), bg_color)
+    draw = ImageDraw.Draw(panel)
+    
+    margin = 10
+    title_height = 35
+    
+    # Combine frame info with instruction
+    full_text = f"{frame_info}\n\n{text}" if frame_info else text
+    
+    font_size, font, wrapped_lines = find_optimal_font_size(
+        full_text, panel_width, panel_height, 
+        margin=margin, title_height=title_height,
+        min_font_size=10, max_font_size=22
+    )
+    
+    title_font = get_font(min(font_size + 2, 24))
+    
+    # Draw title
+    title = "Instruction:"
+    draw.text((margin, margin), title, font=title_font, fill=(100, 200, 255))
+    
+    # Draw wrapped text
+    bbox = font.getbbox("Ag")
+    line_height = (bbox[3] - bbox[1]) + 5
+    y_position = title_height
+    
+    for line in wrapped_lines:
+        draw.text((margin, y_position), line, font=font, fill=text_color)
+        y_position += line_height
+    
+    return np.array(panel)
+
+
+class LerobotAsLmdb:
+    def __init__(self, dataset_path):
+        self.dataset_path = dataset_path
+    
+    def get_all_keys(self, allow_scan_list=['r2r']):
+        keys = []
+        for scan in os.listdir(self.dataset_path):
+            scan_path = os.path.join(self.dataset_path, scan)
+            if not os.path.isdir(scan_path):
+                continue
+            if scan not in allow_scan_list:
+                continue
+            for scene_index in os.listdir(scan_path):
+                scene_path = os.path.join(scan_path, scene_index)
+                if not os.path.isdir(scene_path):
+                    continue
+                
+                data_dir = os.path.join(scene_path, "data")
+                if os.path.exists(data_dir):
+                    for chunk_dir in os.listdir(data_dir):
+                        if chunk_dir.startswith("chunk-"):
+                            chunk_path = os.path.join(data_dir, chunk_dir)
+                            chunk_idx = int(chunk_dir.split("-")[1])
+                            
+                            for file in os.listdir(chunk_path):
+                                if file.startswith("episode_") and file.endswith(".parquet"):
+                                    episode_idx = int(file.split("_")[1].split(".")[0])
+                                    keys.append(f"{scan}_{scene_index}_{chunk_idx:03d}_{episode_idx:06d}")
+                else:
+                    for trajectory in os.listdir(scene_path):
+                        trajectory_path = os.path.join(scene_path, trajectory)
+                        if not os.path.isdir(trajectory_path):
+                            continue
+                        keys.append(f"{scan}_{scene_index}_000_{trajectory:06d}")
+        return keys
+    
+    def get_data_by_key(self, key):
+        # key: {scan}_{scene_index}_{chunk_index}_{episode_index}
+        parts = key.split('_')
+        if len(parts) < 3:
+            raise ValueError(f"Invalid key format: {key}")
+        
+        if parts[1] == 'flash':
+            scan = 'r2r_flash'
+            parts.pop(1)
+        elif parts[1] == 'aliengo':
+            scan = 'r2r_aliengo'
+            parts.pop(1)
+        else:
+            scan = parts[0]
+
+        scene_index = parts[1]
+        chunk_idx = int(parts[-2])
+        episode_idx = int(parts[-1])
+        
+        base_path = os.path.join(self.dataset_path, scan, scene_index)
+        
+        chunk_str = f"chunk-{chunk_idx:03d}"
+        parquet_path = os.path.join(base_path, "data", chunk_str, f"episode_{episode_idx:06d}.parquet")
+        if not os.path.exists(parquet_path):
+            raise FileNotFoundError(f"Parquet file not found: {parquet_path}")
+        
+        df = pd.read_parquet(parquet_path)
+        
+        stats_path = os.path.join(base_path, "meta", "episodes_stats.jsonl")
+        task_min = 0
+        task_max = 0
+        
+        if os.path.exists(stats_path):
+            with open(stats_path, 'r') as f:
+                for line in f:
+                    try:
+                        stats_data = json.loads(line.strip())
+                        if stats_data.get("episode_index") == episode_idx:
+                            task_info = stats_data.get("task_index", {})
+                            task_min = task_info.get("min", 0)
+                            task_max = task_info.get("max", 0)
+                            break
+                    except json.JSONDecodeError as e:
+                        print(f"Error decoding stats JSON: {e}")
+        
+        tasks_path = os.path.join(base_path, "meta", "tasks.jsonl")
+        episodes_in_json = []  
+        finish_status_in_json = None
+        fail_reason_in_json = None
+        
+        with open(tasks_path, 'r') as f:
+            for line in f:
+                try:
+                    json_data = json.loads(line.strip())
+                    task_index = json_data.get("task_index")
+                    
+                    if task_index is not None and task_min <= task_index <= task_max:
+                        episodes_in_json.append(json_data)
+
+                        finish_status_in_json = json_data.get('finish_status')
+                        fail_reason_in_json = json_data.get('fail_reason')
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding tasks JSON: {e}")
+        
+        rgb_path = os.path.join(base_path, "videos", chunk_str, "observation.images.rgb", f"episode_{episode_idx:06d}.npy")
+        depth_path = os.path.join(base_path, "videos", chunk_str, "observation.images.depth", f"episode_{episode_idx:06d}.npy")
+        
+        data = {}
+        data['episode_data'] = {}
+        data['episode_data']['camera_info'] = {}
+        data['episode_data']['camera_info']['pano_camera_0'] = {}
+        
+        data['episode_data']['camera_info']['pano_camera_0']['position'] = np.array(df['observation.camera_position'].tolist())
+        data['episode_data']['camera_info']['pano_camera_0']['orientation'] = np.array(df['observation.camera_orientation'].tolist())
+        data['episode_data']['camera_info']['pano_camera_0']['yaw'] = np.array(df['observation.camera_yaw'].tolist())
+        
+        data['episode_data']['robot_info'] = {}
+        data['episode_data']['robot_info']['position'] = np.array(df['observation.robot_position'].tolist())
+        data['episode_data']['robot_info']['orientation'] = np.array(df['observation.robot_orientation'].tolist())
+        data['episode_data']['robot_info']['yaw'] = np.array(df['observation.robot_yaw'].tolist())
+        
+        data['episode_data']['progress'] = np.array(df['observation.progress'].tolist())
+        data['episode_data']['step'] = np.array(df['observation.step'].tolist())
+        data['episode_data']['action'] = df['observation.action'].tolist()
+        
+        data["finish_status"] = finish_status_in_json
+        data["fail_reason"] = fail_reason_in_json
+        data["episodes_in_json"] = episodes_in_json
+        
+
+        data['episode_data']['camera_info']['pano_camera_0']['rgb'] = np.load(rgb_path)
+        data['episode_data']['camera_info']['pano_camera_0']['depth'] = np.load(depth_path)
+
+        
+        return data
+
+    def save_video(self, key, output_path=None, fps=10, panel_width=400, show_instruction=True):
+        """
+        Save RGB data as a video file with instruction panel on the right.
+        
+        Args:
+            key: The episode key
+            output_path: Output video file path (default: {key}.mp4 in current directory)
+            fps: Frames per second (default: 10)
+            panel_width: Width of the instruction panel (default: 400)
+            show_instruction: Whether to show instruction panel (default: True)
+        
+        Returns:
+            str: Path to the saved video file
+        """
+        # Get data by key
+        data = self.get_data_by_key(key)
+        
+        # Extract RGB data
+        rgb_data = data['episode_data']['camera_info']['pano_camera_0']['rgb']
+        
+        # Extract instruction
+        instruction = ""
+        try:
+            if data.get('episodes_in_json') and len(data['episodes_in_json']) > 0:
+                # Only show the first task
+                instructions_list = []
+                if 'task' in data['episodes_in_json'][0]:
+                    instruction = data['episodes_in_json'][0]['task']
+            else:
+                instruction = "No instruction"
+        except (KeyError, IndexError) as e:
+            print(f"Warning: Could not extract instruction: {e}")
+            instruction = "No instruction"
+        
+        # Set default output path if not provided
+        if output_path is None:
+            output_path = f"{key}.mp4"
+        
+        # Ensure output directory exists
+        output_dir = os.path.dirname(output_path)
+        if output_dir and not os.path.exists(output_dir):
+            os.makedirs(output_dir, exist_ok=True)
+        
+        # Get video dimensions
+        num_frames, height, width, channels = rgb_data.shape
+        
+        # Calculate output video dimensions
+        if show_instruction:
+            output_width = width + panel_width
+        else:
+            output_width = width
+        
+        # Initialize video writer
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        video_writer = cv2.VideoWriter(output_path, fourcc, fps, (output_width, height))
+        
+        print(f"Processing video for key: {key}")
+        print(f"  Frames: {num_frames}, Frame size: {width}x{height}")
+        print(f"  Output size: {output_width}x{height}, FPS: {fps}")
+        if show_instruction:
+            print(f"  Instruction: {instruction[:100]}..." if len(instruction) > 100 else f"  Instruction: {instruction}")
+        
+        # Write frames
+        for i in range(num_frames):
+            frame = rgb_data[i]
+            
+            # Ensure data is in uint8 format
+            if frame.dtype != np.uint8:
+                if frame.max() <= 1.0:
+                    frame = (frame * 255).astype(np.uint8)
+                else:
+                    frame = frame.astype(np.uint8)
+            
+            # Convert RGB to BGR for OpenCV
+            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            
+            # Add instruction panel if requested
+            if show_instruction:
+                # Create frame info text
+                frame_text = f"Frame: {i+1}/{num_frames}"
+                
+                # Create text panel
+                text_panel = create_text_panel(
+                    instruction, 
+                    panel_width, 
+                    height,
+                    frame_info=frame_text
+                )
+                text_panel_bgr = cv2.cvtColor(text_panel, cv2.COLOR_RGB2BGR)
+                
+                # Combine frame and text panel horizontally
+                combined_frame = np.hstack([frame_bgr, text_panel_bgr])
+            else:
+                combined_frame = frame_bgr
+            
+            video_writer.write(combined_frame)
+            
+            # Progress indicator
+            if (i + 1) % 50 == 0 or i == num_frames - 1:
+                print(f"  Progress: {i + 1}/{num_frames} frames ({100*(i+1)/num_frames:.1f}%)")
+        
+        # Release video writer
+        video_writer.release()
+        
+        print(f"✓ Video saved to: {output_path}")
+        
+        return output_path
+
+
+if __name__ == '__main__':
+    ds = LerobotAsLmdb('/shared/smartbot/vln-pe-0.5')
+    allow_scan_name = 'r2r_flash'
+    keys = ds.get_all_keys(allow_scan_list=[allow_scan_name])
+    print(f"total keys:{len(keys)}")
+    for k in keys[:-5]: 
+        try:
+            o = ds.get_data_by_key(k)
+            # ds.save_video(k, output_path=f"videos/{allow_scan_name}/{k}.mp4")
+            print(f"Key: {k}")
+            print(f"  Finish status: {o.get('finish_status')}")
+            print(f"  Tasks in JSON: {len(o.get('episodes_in_json', []))}")
+            print(f"  RGB data: {'loaded' if o['episode_data']['camera_info']['pano_camera_0'].get('rgb') is not None else 'not found'}")
+            print(f"  Depth data: {'loaded' if o['episode_data']['camera_info']['pano_camera_0'].get('depth') is not None else 'not found'}")
+        except Exception as e:
+            print(f"Error processing key {k}: {e}")
+
+    
\ No newline at end of file
diff --git a/scripts/train/base_train/start_train.sh b/scripts/train/base_train/start_train.sh
index 4c37bbdc..f5531157 100755
--- a/scripts/train/base_train/start_train.sh
+++ b/scripts/train/base_train/start_train.sh
@@ -75,12 +75,12 @@ if [[ "$MODEL" == "navdp" ]]; then
         --node_rank=0 \
         --master_addr=localhost \
         --master_port=12345 \
-        scripts/base_train/train.py \
+        scripts/train/base_train/train.py \
         --name "$NAME" \
         --model-name "$MODEL"
 else
     echo "Using python to start $MODEL training, using $NUM_GPUS GPUs (CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES)"
-    python scripts/base_train/train.py \
+    python scripts/train/base_train/train.py \
         --name "$NAME" \
         --model-name "$MODEL"
 fi