diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0a8fd42..8675e62 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,7 @@ jobs:
         run: python -m ruff check .
 
       - name: Mypy (type check)
-        run: mypy || true
+        run: mypy autorepro tests || true
 
       - name: Export PYTHON_BIN
         run: echo "PYTHON_BIN=$(which python)" >> $GITHUB_ENV
diff --git a/README.md b/README.md
index 2e7364e..bff9e87 100644
--- a/README.md
+++ b/README.md
@@ -291,6 +291,69 @@ $ autorepro scan --json --show 3
 - **Python**: `pyproject.toml`, `setup.py`, `requirements.txt`, `*.py`
 - **Rust**: `Cargo.toml`, `Cargo.lock`, `*.rs`
 
+### Exec Command (Multi-execution Support)
+
+Executes commands from a reproduction plan with support for single or multiple command execution, early stopping, and structured logging.
+
+```bash
+# Execute single command (default behavior)
+$ autorepro exec --desc "pytest failing"
+# Executes the top-ranked command
+
+# Execute all candidate commands in order
+$ autorepro exec --desc "pytest failing" --all
+# Runs all suggested commands sequentially
+
+# Execute specific commands by index
+$ autorepro exec --desc "test issues" --indexes "0,2-4"
+# Runs commands at indices 0, 2, 3, and 4
+
+# Stop at first successful command
+$ autorepro exec --desc "build problems" --until-success
+# Stops execution after first command with exit code 0
+
+# Multi-execution with JSONL logging
+$ autorepro exec --desc "CI tests" --all --jsonl runs.jsonl --summary summary.json
+
+# Dry-run to preview selected commands
+$ autorepro exec --desc "npm test" --indexes "1,3" --dry-run
+[1] npm test --verbose
+[3] npm run test:unit
+```
+
+**Multi-execution Features:**
+
+- **`--all`**: Execute all candidate commands in their original order
+- **`--indexes "N,M-P"`**: Execute specific commands by indices/ranges (comma-separated)
+- **`--until-success`**: Stop after the first command that exits with code 0
+- **`--summary FILE.json`**: Write final execution summary to JSON file
+- **Command precedence**: `--indexes` takes precedence over `--all`, both override single `--index`
+
+**JSONL Output Format:**
+
+When using `--jsonl`, each execution produces a run record followed by a final summary:
+
+```jsonl
+{"type": "run", "index": 0, "cmd": "pytest", "start_ts": "2025-09-13T12:00:00Z", "end_ts": "2025-09-13T12:00:05Z", "exit_code": 1, "duration_ms": 5000}
+{"type": "run", "index": 1, "cmd": "pytest -v", "start_ts": "2025-09-13T12:00:05Z", "end_ts": "2025-09-13T12:00:08Z", "exit_code": 0, "duration_ms": 3000}
+{"type": "summary", "schema_version": 1, "tool": "autorepro", "runs": 2, "successes": 1, "first_success_index": 1}
+```
+
+**Execution Options:**
+
+- `--timeout N`: Command timeout in seconds (default: 120)
+- `--env KEY=VAL`: Set environment variable (repeatable)
+- `--env-file PATH`: Load environment variables from file
+- `--tee PATH`: Append full stdout/stderr to log file
+- `--jsonl PATH`: Stream JSONL records for each run and summary
+- `--dry-run`: Print selected commands without executing
+
+**Exit Behavior:**
+
+- **Single execution**: Returns the command's exit code
+- **Multi-execution**: Returns 0 if any command succeeded, otherwise the last exit code
+- **`--until-success`**: Returns 0 when a command succeeds, otherwise the last failure code
+
 ### Init Command
 
 Creates a devcontainer.json file with default configuration (Python 3.11, Node 20, Go 1.22). The command is idempotent and provides atomic file writes.
diff --git a/autorepro/cli.py b/autorepro/cli.py
index 6da21b3..58dbafb 100644
--- a/autorepro/cli.py
+++ b/autorepro/cli.py
@@ -450,6 +450,27 @@ def _setup_exec_parser(subparsers) -> argparse.ArgumentParser:
         action="store_true",
         help="Exit with code 1 if no commands make the cut after filtering",
     )
+
+    # Multi-execution arguments
+    exec_parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Execute all candidate commands in order",
+    )
+    exec_parser.add_argument(
+        "--indexes",
+        help="Execute specific commands by indices/ranges (e.g., '0,2-3')",
+    )
+    exec_parser.add_argument(
+        "--until-success",
+        action="store_true",
+        help="Stop after the first command that exits with code 0",
+    )
+    exec_parser.add_argument(
+        "--summary",
+        help="Write final summary JSON to file",
+    )
+
     exec_parser.add_argument(
         "--profile",
         help="Named profile from .autorepro.toml to apply",
@@ -755,6 +776,12 @@ class ExecConfig:
     min_score: int = field(default_factory=lambda: config.limits.min_score_threshold)
     strict: bool = False
 
+    # Multi-execution fields
+    all: bool = False
+    indexes: str | None = None
+    until_success: bool = False
+    summary_path: str | None = None
+
     def validate(self) -> None:
         """Validate exec configuration and raise descriptive errors."""
         # Mutual exclusivity validation
@@ -768,6 +795,25 @@ def validate(self) -> None:
                 "Must specify either --desc or --file", field="desc,file"
             )
 
+        # Multi-execution validation
+        multi_exec_flags = sum(
+            [bool(self.all), bool(self.indexes), bool(self.until_success)]
+        )
+        if multi_exec_flags > 0:
+            # If using multi-execution, validate combinations
+            if self.all and self.indexes:
+                # indexes takes precedence, just warn
+                pass
+
+            # Validate indexes format if provided
+            if self.indexes:
+                try:
+                    self._parse_indexes(self.indexes)
+                except ValueError as e:
+                    raise FieldValidationError(
+                        f"invalid indexes format: {e}", field="indexes"
+                    ) from e
+
         # Field validation
         if self.timeout <= 0:
             raise FieldValidationError(
@@ -781,6 +827,45 @@ def validate(self) -> None:
 
         # File path validation is done later for proper I/O error handling
 
+    def _parse_indexes(self, indexes_str: str) -> list[int]:  # noqa: C901
+        """Parse indexes string like '0,2-3' into list of integers."""
+        if not indexes_str.strip():
+            raise ValueError("indexes string cannot be empty")
+
+        result: list[int] = []
+        for part in indexes_str.split(","):
+            part = part.strip()
+            if not part:
+                continue
+
+            if "-" in part:
+                # Range like "2-5"
+                try:
+                    start, end = part.split("-", 1)
+                    start_idx = int(start.strip())
+                    end_idx = int(end.strip())
+                    if start_idx < 0 or end_idx < 0:
+                        raise ValueError("indexes must be non-negative")
+                    if start_idx > end_idx:
+                        raise ValueError(f"invalid range {part}: start > end")
+                    result.extend(range(start_idx, end_idx + 1))
+                except ValueError as e:
+                    if "invalid range" in str(e):
+                        raise
+                    raise ValueError(f"invalid range format: {part}") from e
+            else:
+                # Single index
+                try:
+                    idx = int(part)
+                    if idx < 0:
+                        raise ValueError("indexes must be non-negative")
+                    result.append(idx)
+                except ValueError:
+                    raise ValueError(f"invalid index: {part}") from None
+
+        # Remove duplicates and sort
+        return sorted(set(result))
+
 
 @dataclass
 @dataclass
@@ -1418,6 +1503,51 @@ def _generate_exec_suggestions(
     return suggestions, None
 
 
+def _resolve_command_selection(  # noqa: PLR0911
+    suggestions: list, config: ExecConfig
+) -> tuple[list[int] | None, int | None]:
+    """
+    Resolve which command indices to execute based on config.
+
+    Returns:
+        Tuple of (selected_indices, error_code). If error_code is not None, should return it.
+    """
+    if not suggestions:
+        return [], None
+
+    # Determine selection mode
+    if config.indexes:
+        # --indexes takes precedence
+        try:
+            requested_indices = config._parse_indexes(config.indexes)
+            # Validate indices are within range
+            max_index = len(suggestions) - 1
+            invalid_indices = [i for i in requested_indices if i > max_index]
+            if invalid_indices:
+                log = logging.getLogger("autorepro")
+                log.error(
+                    f"Invalid indices {invalid_indices}: only {len(suggestions)} commands available (0-{max_index})"
+                )
+                return None, 2
+            return requested_indices, None
+        except ValueError as e:
+            log = logging.getLogger("autorepro")
+            log.error(f"Invalid indexes format: {e}")
+            return None, 1
+    elif config.all:
+        # --all executes all commands
+        return list(range(len(suggestions))), None
+    else:
+        # Single command execution (original behavior)
+        if config.index >= len(suggestions):
+            log = logging.getLogger("autorepro")
+            log.error(
+                f"Index {config.index} out of range: only {len(suggestions)} commands available"
+            )
+            return None, 2
+        return [config.index], None
+
+
 def _select_exec_command(
     suggestions: list, config: ExecConfig
 ) -> tuple[tuple | None, int | None]:
@@ -1599,7 +1729,7 @@ def _handle_exec_output_logging(results: dict, config: ExecConfig) -> None:
             log.error(f"Failed to write JSONL log: {e}")
 
 
-def _execute_exec_pipeline(config: ExecConfig) -> int:
+def _execute_exec_pipeline(config: ExecConfig) -> int:  # noqa: PLR0911
     """Execute the complete exec command pipeline."""
     # Validate configuration
     config.validate()
@@ -1620,22 +1750,35 @@ def _execute_exec_pipeline(config: ExecConfig) -> int:
     if error is not None:
         return error
 
-    # Select command by index
-    selected_command, error = _select_exec_command(suggestions, config)
+    # Resolve which commands to execute
+    selected_indices, error = _resolve_command_selection(suggestions, config)
     if error is not None:
         return error
-    assert (
-        selected_command is not None
-    )  # Type assertion - we know command is valid if no error
+    assert selected_indices is not None
 
-    command_str, score, rationale = selected_command
+    # Handle empty selection
+    if not selected_indices:
+        log = logging.getLogger("autorepro")
+        log.error("No commands to execute")
+        return 1
 
     # Handle dry-run
     if config.dry_run:
-        print(command_str)
+        for index in selected_indices:
+            command_str, score, rationale = suggestions[index]
+            print(f"[{index}] {command_str}")
         return 0
 
-    return _execute_exec_command_real(command_str, repo_path, config)
+    # Execute commands (single or multiple)
+    if len(selected_indices) == 1 and not (config.jsonl_path or config.summary_path):
+        # Single command execution (backward compatible)
+        command_str, score, rationale = suggestions[selected_indices[0]]
+        return _execute_exec_command_real(command_str, repo_path, config)
+    else:
+        # Multi-command execution with JSONL support
+        return _execute_multiple_commands(
+            suggestions, selected_indices, repo_path, config
+        )
 
 
 def _execute_exec_command_real(
@@ -1668,6 +1811,156 @@ def _execute_exec_command_real(
     return results["exit_code"]
 
 
+def _write_jsonl_record(file_path: str, record: dict) -> None:
+    """Write a single JSONL record to file."""
+    try:
+        with open(file_path, "a", encoding="utf-8") as f:
+            f.write(json.dumps(record) + "\n")
+    except OSError as e:
+        log = logging.getLogger("autorepro")
+        log.error(f"Failed to write JSONL record: {e}")
+
+
+def _create_run_record(
+    index: int,
+    command_str: str,
+    results: dict,
+    start_time: datetime,
+    end_time: datetime,
+) -> dict:
+    """Create a run record for JSONL output."""
+    return {
+        "type": "run",
+        "index": index,
+        "cmd": command_str,
+        "start_ts": start_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "end_ts": end_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "exit_code": results["exit_code"],
+        "duration_ms": results["duration_ms"],
+        # Optional fields for output paths if they exist
+        **(
+            {}
+            if not results.get("stdout_path")
+            else {"stdout_path": str(results["stdout_path"])}
+        ),
+        **(
+            {}
+            if not results.get("stderr_path")
+            else {"stderr_path": str(results["stderr_path"])}
+        ),
+    }
+
+
+def _create_summary_record(
+    runs: int, successes: int, first_success_index: int | None
+) -> dict:
+    """Create a summary record for JSONL output."""
+    return {
+        "type": "summary",
+        "schema_version": 1,
+        "tool": "autorepro",
+        "runs": runs,
+        "successes": successes,
+        "first_success_index": first_success_index,
+    }
+
+
+def _execute_multiple_commands(  # noqa: C901, PLR0912
+    suggestions: list,
+    selected_indices: list[int],
+    repo_path: Path | None,
+    config: ExecConfig,
+) -> int:
+    """
+    Execute multiple commands and handle JSONL output.
+
+    Returns:
+        Final exit code (0 if any command succeeded, otherwise last exit code)
+    """
+    log = logging.getLogger("autorepro")
+
+    # Prepare environment variables
+    env, error = _prepare_exec_environment(config)
+    if error is not None:
+        return error
+    assert env is not None
+
+    # Determine execution directory
+    exec_dir = repo_path if repo_path else Path.cwd()
+
+    # Track execution results
+    runs = 0
+    successes = 0
+    first_success_index = None
+    last_exit_code = 0
+
+    # Execute selected commands
+    for _i, suggestion_index in enumerate(selected_indices):
+        command_str, score, rationale = suggestions[suggestion_index]
+
+        log.info(f"Executing command {suggestion_index}: {command_str}")
+
+        # Record start time
+        start_time = datetime.now()
+
+        # Execute the command
+        results, error = _execute_command(command_str, env, exec_dir, config)
+        if error is not None:
+            return error
+
+        # Record end time
+        end_time = datetime.now()
+
+        runs += 1
+        exit_code = results["exit_code"]
+        last_exit_code = exit_code
+
+        # Track success
+        if exit_code == 0:
+            successes += 1
+            if first_success_index is None:
+                first_success_index = suggestion_index
+
+        # Write JSONL record if requested
+        if config.jsonl_path:
+            run_record = _create_run_record(
+                suggestion_index, command_str, results, start_time, end_time
+            )
+            _write_jsonl_record(config.jsonl_path, run_record)
+
+        # Handle output logging (for --tee only, not JSONL in multi-execution mode)
+        if config.tee_path:
+            _handle_exec_output_logging(results, config)
+
+        # Print output to console (unless quiet)
+        if results["stdout_full"]:
+            print(results["stdout_full"], end="")
+        if results["stderr_full"]:
+            print(results["stderr_full"], file=sys.stderr, end="")
+
+        # Check for early stopping
+        if config.until_success and exit_code == 0:
+            log.info(f"Stopping after first success (command {suggestion_index})")
+            break
+
+    # Write summary record
+    summary_record = _create_summary_record(runs, successes, first_success_index)
+
+    if config.jsonl_path:
+        _write_jsonl_record(config.jsonl_path, summary_record)
+
+    if config.summary_path:
+        try:
+            with open(config.summary_path, "w", encoding="utf-8") as f:
+                json.dump(summary_record, f, indent=2)
+        except OSError as e:
+            log.error(f"Failed to write summary file: {e}")
+            return 1
+
+    # Return 0 if any command succeeded, otherwise last exit code
+    return 0 if successes > 0 else last_exit_code
+
+
 def cmd_exec(config: ExecConfig | None = None, **kwargs) -> int:
     """Handle the exec command."""
     # Support backward compatibility with individual parameters
@@ -1686,6 +1979,10 @@ def cmd_exec(config: ExecConfig | None = None, **kwargs) -> int:
             dry_run=kwargs.get("dry_run", False),
             min_score=kwargs.get("min_score", global_config.limits.min_score_threshold),
             strict=kwargs.get("strict", False),
+            all=kwargs.get("all", False),
+            indexes=kwargs.get("indexes"),
+            until_success=kwargs.get("until_success", False),
+            summary_path=kwargs.get("summary_path"),
         )
 
     try:
@@ -2032,6 +2329,10 @@ def _dispatch_exec_command(args) -> int:
         dry_run=args.dry_run,
         min_score=effective_min,
         strict=effective_strict,
+        all=getattr(args, "all", False),
+        indexes=getattr(args, "indexes", None),
+        until_success=getattr(args, "until_success", False),
+        summary_path=getattr(args, "summary", None),
     )
 
 
diff --git a/runs.jsonl b/runs.jsonl
deleted file mode 100644
index 444daae..0000000
--- a/runs.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"schema_version": 1, "tool": "autorepro", "tool_version": "0.0.1", "cmd": "python -c \"import time, sys; time.sleep(3); sys.exit(0)\"", "index": 0, "cwd": "/Users/ali/autorepro", "start": "2025-09-01T10:35:50Z", "duration_ms": 1004, "exit_code": 124, "timed_out": true, "stdout_preview": "", "stderr_preview": "Command timed out after 1 seconds"}
diff --git a/tests/test_exec_cli.py b/tests/test_exec_cli.py
index 8827135..b6f37ed 100644
--- a/tests/test_exec_cli.py
+++ b/tests/test_exec_cli.py
@@ -113,18 +113,28 @@ def test_exec_with_python_repo_executes_command(self):
             # Should have created JSONL log
             assert jsonl_path.exists()
 
-            # Parse JSONL record
+            # Parse JSONL records (multi-execution format)
             with open(jsonl_path) as f:
-                record = json.loads(f.read().strip())
-
-            assert record["schema_version"] == 1
-            assert record["tool"] == "autorepro"
-            assert "cmd" in record
-            assert "index" in record
-            assert "cwd" in record
-            assert "start" in record
-            assert "duration_ms" in record
-            assert "exit_code" in record
+                lines = f.read().strip().split("\n")
+
+            # Should have at least a run record and summary record
+            assert len(lines) >= 2
+
+            # Parse the first line (run record)
+            run_record = json.loads(lines[0])
+            assert run_record["type"] == "run"
+            assert "cmd" in run_record
+            assert "index" in run_record
+            assert "start_ts" in run_record
+            assert "end_ts" in run_record
+            assert "duration_ms" in run_record
+            assert "exit_code" in run_record
+
+            # Parse the last line (summary record)
+            summary_record = json.loads(lines[-1])
+            assert summary_record["type"] == "summary"
+            assert summary_record["schema_version"] == 1
+            assert summary_record["tool"] == "autorepro"
 
     def test_exec_index_selects_command(self):
         """Test --index selects the N-th command."""
@@ -166,8 +176,11 @@ def test_exec_index_selects_command(self):
             # Check JSONL record has correct index
             if jsonl_path.exists():
                 with open(jsonl_path) as f:
-                    record = json.loads(f.read().strip())
-                assert record["index"] == 1
+                    lines = f.read().strip().split("\n")
+                # Parse the first line (run record)
+                run_record = json.loads(lines[0])
+                assert run_record["type"] == "run"
+                assert run_record["index"] == 1
 
     def test_exec_strict_empty_exits_with_error(self):
         """Test --strict with no commands exits 1."""
diff --git a/tests/test_exec_multi.py b/tests/test_exec_multi.py
new file mode 100644
index 0000000..c762148
--- /dev/null
+++ b/tests/test_exec_multi.py
@@ -0,0 +1,533 @@
+"""Tests for multi-execution functionality in exec command."""
+
+import json
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from autorepro.cli import (
+    ExecConfig,
+    _create_run_record,
+    _create_summary_record,
+    _resolve_command_selection,
+)
+from autorepro.config.exceptions import FieldValidationError
+
+
+class TestExecConfigValidation:
+    """Test validation of multi-execution fields in ExecConfig."""
+
+    def test_valid_multi_execution_config(self):
+        """Test valid multi-execution configurations."""
+        # Valid --all
+        config = ExecConfig(desc="test", all=True)
+        config.validate()  # Should not raise
+
+        # Valid --indexes
+        config = ExecConfig(desc="test", indexes="0,2-4")
+        config.validate()  # Should not raise
+
+        # Valid --until-success
+        config = ExecConfig(desc="test", until_success=True)
+        config.validate()  # Should not raise
+
+    def test_indexes_parsing_valid_formats(self):
+        """Test parsing of valid indexes formats."""
+        config = ExecConfig(desc="test", indexes="0")
+        assert config._parse_indexes("0") == [0]
+
+        config = ExecConfig(desc="test", indexes="0,2,4")
+        assert config._parse_indexes("0,2,4") == [0, 2, 4]
+
+        config = ExecConfig(desc="test", indexes="0-3")
+        assert config._parse_indexes("0-3") == [0, 1, 2, 3]
+
+        config = ExecConfig(desc="test", indexes="0,2-4,6")
+        assert config._parse_indexes("0,2-4,6") == [0, 2, 3, 4, 6]
+
+        # Test duplicate removal and sorting
+        config = ExecConfig(desc="test", indexes="3,1,2,1")
+        assert config._parse_indexes("3,1,2,1") == [1, 2, 3]
+
+    def test_indexes_parsing_invalid_formats(self):
+        """Test parsing of invalid indexes formats."""
+        config = ExecConfig(desc="test", indexes="invalid")
+
+        # Empty string
+        with pytest.raises(FieldValidationError, match="invalid indexes format"):
+            config.validate()
+
+        # Invalid index
+        config.indexes = "abc"
+        with pytest.raises(FieldValidationError, match="invalid indexes format"):
+            config.validate()
+
+        # Negative index
+        config.indexes = "-1"
+        with pytest.raises(FieldValidationError, match="invalid indexes format"):
+            config.validate()
+
+        # Invalid range
+        config.indexes = "3-1"  # start > end
+        with pytest.raises(FieldValidationError, match="invalid indexes format"):
+            config.validate()
+
+        # Invalid range format
+        config.indexes = "1-2-3"
+        with pytest.raises(FieldValidationError, match="invalid indexes format"):
+            config.validate()
+
+
+class TestCommandSelection:
+    """Test command selection logic for multi-execution."""
+
+    def test_resolve_command_selection_single_index(self):
+        """Test single index selection (default behavior)."""
+        suggestions = [
+            ("cmd1", 5, "rationale1"),
+            ("cmd2", 4, "rationale2"),
+            ("cmd3", 3, "rationale3"),
+        ]
+        config = ExecConfig(desc="test", index=1)
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error is None
+        assert selected_indices == [1]
+
+    def test_resolve_command_selection_all(self):
+        """Test --all flag selection."""
+        suggestions = [
+            ("cmd1", 5, "rationale1"),
+            ("cmd2", 4, "rationale2"),
+            ("cmd3", 3, "rationale3"),
+        ]
+        config = ExecConfig(desc="test", all=True)
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error is None
+        assert selected_indices == [0, 1, 2]
+
+    def test_resolve_command_selection_indexes(self):
+        """Test --indexes flag selection."""
+        suggestions = [
+            ("cmd1", 5, "rationale1"),
+            ("cmd2", 4, "rationale2"),
+            ("cmd3", 3, "rationale3"),
+        ]
+        config = ExecConfig(desc="test", indexes="0,2")
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error is None
+        assert selected_indices == [0, 2]
+
+    def test_resolve_command_selection_indexes_precedence(self):
+        """Test that --indexes takes precedence over --all."""
+        suggestions = [
+            ("cmd1", 5, "rationale1"),
+            ("cmd2", 4, "rationale2"),
+            ("cmd3", 3, "rationale3"),
+        ]
+        config = ExecConfig(desc="test", all=True, indexes="1")
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error is None
+        assert selected_indices == [1]  # indexes takes precedence
+
+    def test_resolve_command_selection_index_out_of_range(self):
+        """Test error handling for out-of-range indices."""
+        suggestions = [("cmd1", 5, "rationale1"), ("cmd2", 4, "rationale2")]
+        config = ExecConfig(desc="test", index=5)
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error == 2
+        assert selected_indices is None
+
+    def test_resolve_command_selection_indexes_out_of_range(self):
+        """Test error handling for out-of-range indexes in --indexes."""
+        suggestions = [("cmd1", 5, "rationale1"), ("cmd2", 4, "rationale2")]
+        config = ExecConfig(desc="test", indexes="0,5")
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error == 2
+        assert selected_indices is None
+
+    def test_resolve_command_selection_empty_suggestions(self):
+        """Test handling of empty suggestions list."""
+        suggestions = []
+        config = ExecConfig(desc="test", all=True)
+
+        selected_indices, error = _resolve_command_selection(suggestions, config)
+
+        assert error is None
+        assert selected_indices == []
+
+
+class TestJSONLRecords:
+    """Test JSONL record creation functions."""
+
+    def test_create_run_record(self):
+        """Test creation of run records."""
+        results = {
+            "exit_code": 0,
+            "duration_ms": 1234,
+            "stdout_path": "/tmp/stdout.txt",
+            "stderr_path": "/tmp/stderr.txt",
+        }
+        start_time = datetime(2025, 9, 13, 12, 0, 0)
+        end_time = datetime(2025, 9, 13, 12, 0, 1)
+
+        record = _create_run_record(2, "test command", results, start_time, end_time)
+
+        assert record["type"] == "run"
+        assert record["index"] == 2
+        assert record["cmd"] == "test command"
+        assert record["start_ts"] == "2025-09-13T12:00:00Z"
+        assert record["end_ts"] == "2025-09-13T12:00:01Z"
+        assert record["exit_code"] == 0
+        assert record["duration_ms"] == 1234
+        assert record["stdout_path"] == "/tmp/stdout.txt"
+        assert record["stderr_path"] == "/tmp/stderr.txt"
+
+    def test_create_run_record_minimal(self):
+        """Test creation of run records with minimal fields."""
+        results = {
+            "exit_code": 1,
+            "duration_ms": 500,
+        }
+        start_time = datetime(2025, 9, 13, 12, 0, 0)
+        end_time = datetime(2025, 9, 13, 12, 0, 1)
+
+        record = _create_run_record(0, "test command", results, start_time, end_time)
+
+        assert record["type"] == "run"
+        assert record["index"] == 0
+        assert record["cmd"] == "test command"
+        assert record["exit_code"] == 1
+        assert record["duration_ms"] == 500
+        assert "stdout_path" not in record
+        assert "stderr_path" not in record
+
+    def test_create_summary_record(self):
+        """Test creation of summary records."""
+        record = _create_summary_record(5, 2, 1)
+
+        assert record["type"] == "summary"
+        assert record["schema_version"] == 1
+        assert record["tool"] == "autorepro"
+        assert record["runs"] == 5
+        assert record["successes"] == 2
+        assert record["first_success_index"] == 1
+
+    def test_create_summary_record_no_successes(self):
+        """Test creation of summary records with no successes."""
+        record = _create_summary_record(3, 0, None)
+
+        assert record["type"] == "summary"
+        assert record["runs"] == 3
+        assert record["successes"] == 0
+        assert record["first_success_index"] is None
+
+
+class TestMultiExecutionIntegration:
+    """Integration tests for multi-execution functionality."""
+
+    @patch("autorepro.cli._handle_exec_output_logging")
+    @patch("autorepro.cli._execute_command")
+    @patch("autorepro.cli._prepare_exec_environment")
+    def test_multi_execution_all_commands(self, mock_env, mock_execute, mock_logging):
+        """Test executing all commands with --all flag."""
+        from autorepro.cli import _execute_multiple_commands
+
+        # Mock environment setup
+        mock_env.return_value = ({"PATH": "/usr/bin"}, None)
+
+        # Mock command executions
+        mock_execute.side_effect = [
+            (
+                {
+                    "exit_code": 1,
+                    "duration_ms": 100,
+                    "stdout_full": "",
+                    "stderr_full": "",
+                },
+                None,
+            ),
+            (
+                {
+                    "exit_code": 0,
+                    "duration_ms": 200,
+                    "stdout_full": "success",
+                    "stderr_full": "",
+                },
+                None,
+            ),
+            (
+                {
+                    "exit_code": 2,
+                    "duration_ms": 150,
+                    "stdout_full": "",
+                    "stderr_full": "error",
+                },
+                None,
+            ),
+        ]
+
+        suggestions = [
+            ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"),
+            ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"),
+            ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"),
+        ]
+        selected_indices = [0, 1, 2]
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            jsonl_path = Path(tmpdir) / "runs.jsonl"
+            summary_path = Path(tmpdir) / "summary.json"
+            config = ExecConfig(
+                desc="test",
+                all=True,
+                jsonl_path=str(jsonl_path),
+                summary_path=str(summary_path),
+            )
+
+            exit_code = _execute_multiple_commands(
+                suggestions, selected_indices, None, config
+            )
+
+            # Should return 0 because one command succeeded
+            assert exit_code == 0
+
+            # Check JSONL output
+            assert jsonl_path.exists()
+            lines = jsonl_path.read_text().strip().split("\n")
+            assert len(lines) == 4  # 3 run records + 1 summary record
+
+            # Check run records
+            run1 = json.loads(lines[0])
+            assert run1["type"] == "run"
+            assert run1["index"] == 0
+            assert run1["exit_code"] == 1
+
+            run2 = json.loads(lines[1])
+            assert run2["type"] == "run"
+            assert run2["index"] == 1
+            assert run2["exit_code"] == 0
+
+            run3 = json.loads(lines[2])
+            assert run3["type"] == "run"
+            assert run3["index"] == 2
+            assert run3["exit_code"] == 2
+
+            # Check summary record
+            summary = json.loads(lines[3])
+            assert summary["type"] == "summary"
+            assert summary["runs"] == 3
+            assert summary["successes"] == 1
+            assert summary["first_success_index"] == 1
+
+            # Check summary file
+            assert summary_path.exists()
+            summary_file = json.loads(summary_path.read_text())
+            assert summary_file == summary
+
+    @patch("autorepro.cli._handle_exec_output_logging")
+    @patch("autorepro.cli._execute_command")
+    @patch("autorepro.cli._prepare_exec_environment")
+    def test_multi_execution_until_success(self, mock_env, mock_execute, mock_logging):
+        """Test executing commands with --until-success flag."""
+        from autorepro.cli import _execute_multiple_commands
+
+        # Mock environment setup
+        mock_env.return_value = ({"PATH": "/usr/bin"}, None)
+
+        # Mock command executions - first fails, second succeeds
+        mock_execute.side_effect = [
+            (
+                {
+                    "exit_code": 1,
+                    "duration_ms": 100,
+                    "stdout_full": "",
+                    "stderr_full": "",
+                },
+                None,
+            ),
+            (
+                {
+                    "exit_code": 0,
+                    "duration_ms": 200,
+                    "stdout_full": "success",
+                    "stderr_full": "",
+                },
+                None,
+            ),
+        ]
+
+        suggestions = [
+            ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"),
+            ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"),
+            ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"),
+        ]
+        selected_indices = [0, 1, 2]  # All three, but should stop after second
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            jsonl_path = Path(tmpdir) / "runs.jsonl"
+            config = ExecConfig(
+                desc="test", all=True, until_success=True, jsonl_path=str(jsonl_path)
+            )
+
+            exit_code = _execute_multiple_commands(
+                suggestions, selected_indices, None, config
+            )
+
+            # Should return 0 because a command succeeded
+            assert exit_code == 0
+
+            # Check JSONL output - should only have 2 run records + 1 summary
+            lines = jsonl_path.read_text().strip().split("\n")
+            assert len(lines) == 3  # 2 run records + 1 summary record
+
+            # Check that execution stopped after success
+            summary = json.loads(lines[2])
+            assert summary["runs"] == 2
+            assert summary["successes"] == 1
+            assert summary["first_success_index"] == 1
+
+    @patch("autorepro.cli._handle_exec_output_logging")
+    @patch("autorepro.cli._execute_command")
+    @patch("autorepro.cli._prepare_exec_environment")
+    def test_multi_execution_indexes(self, mock_env, mock_execute, mock_logging):
+        """Test executing specific commands with --indexes flag."""
+        from autorepro.cli import _execute_multiple_commands
+
+        # Mock environment setup
+        mock_env.return_value = ({"PATH": "/usr/bin"}, None)
+
+        # Mock command executions
+        mock_execute.side_effect = [
+            (
+                {
+                    "exit_code": 0,
+                    "duration_ms": 100,
+                    "stdout_full": "success",
+                    "stderr_full": "",
+                },
+                None,
+            ),
+            (
+                {
+                    "exit_code": 2,
+                    "duration_ms": 150,
+                    "stdout_full": "",
+                    "stderr_full": "error",
+                },
+                None,
+            ),
+        ]
+
+        suggestions = [
+            ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"),
+            ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"),
+            ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"),
+        ]
+        selected_indices = [1, 2]  # Only execute commands at indices 1 and 2
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            jsonl_path = Path(tmpdir) / "runs.jsonl"
+            config = ExecConfig(desc="test", indexes="1,2", jsonl_path=str(jsonl_path))
+
+            exit_code = _execute_multiple_commands(
+                suggestions, selected_indices, None, config
+            )
+
+            # Should return 0 because first selected command succeeded
+            assert exit_code == 0
+
+            # Check JSONL output
+            lines = jsonl_path.read_text().strip().split("\n")
+            assert len(lines) == 3  # 2 run records + 1 summary record
+
+            # Check that correct commands were executed
+            run1 = json.loads(lines[0])
+            assert run1["index"] == 1
+            assert run1["exit_code"] == 0
+
+            run2 = json.loads(lines[1])
+            assert run2["index"] == 2
+            assert run2["exit_code"] == 2
+
+            summary = json.loads(lines[2])
+            assert summary["runs"] == 2
+            assert summary["successes"] == 1
+            assert summary["first_success_index"] == 1
+
+
+class TestBackwardCompatibility:
+    """Test that multi-execution doesn't break existing functionality."""
+
+    def test_single_command_execution_unchanged(self):
+        """Test that single command execution behavior is unchanged."""
+        from autorepro.cli import _execute_exec_pipeline
+
+        with patch("autorepro.cli._validate_exec_repo_path") as mock_repo:
+            with patch("autorepro.cli._read_exec_input_text") as mock_text:
+                with patch(
+                    "autorepro.cli._generate_exec_suggestions"
+                ) as mock_suggestions:
+                    with patch("autorepro.cli._execute_exec_command_real") as mock_exec:
+                        # Mock the pipeline components
+                        mock_repo.return_value = (Path("/tmp"), None)
+                        mock_text.return_value = ("test description", None)
+                        mock_suggestions.return_value = (
+                            [("cmd1", 5, "rationale")],
+                            None,
+                        )
+                        mock_exec.return_value = 0
+
+                        config = ExecConfig(
+                            desc="test", index=0
+                        )  # No multi-execution flags
+
+                        exit_code = _execute_exec_pipeline(config)
+
+                        assert exit_code == 0
+                        # Should use single command execution path
+                        mock_exec.assert_called_once()
+
+    def test_jsonl_triggers_multi_execution_path(self):
+        """Test that --jsonl triggers multi-execution path even for single command."""
+        from autorepro.cli import _execute_exec_pipeline
+
+        with patch("autorepro.cli._validate_exec_repo_path") as mock_repo:
+            with patch("autorepro.cli._read_exec_input_text") as mock_text:
+                with patch(
+                    "autorepro.cli._generate_exec_suggestions"
+                ) as mock_suggestions:
+                    with patch(
+                        "autorepro.cli._execute_multiple_commands"
+                    ) as mock_multi_exec:
+                        # Mock the pipeline components
+                        mock_repo.return_value = (Path("/tmp"), None)
+                        mock_text.return_value = ("test description", None)
+                        mock_suggestions.return_value = (
+                            [("cmd1", 5, "rationale")],
+                            None,
+                        )
+                        mock_multi_exec.return_value = 0
+
+                        config = ExecConfig(
+                            desc="test", index=0, jsonl_path="runs.jsonl"
+                        )
+
+                        exit_code = _execute_exec_pipeline(config)
+
+                        assert exit_code == 0
+                        # Should use multi-execution path because of JSONL
+                        mock_multi_exec.assert_called_once()