diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0a8fd42..8675e62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: run: python -m ruff check . - name: Mypy (type check) - run: mypy || true + run: mypy autorepro tests || true - name: Export PYTHON_BIN run: echo "PYTHON_BIN=$(which python)" >> $GITHUB_ENV diff --git a/README.md b/README.md index 2e7364e..bff9e87 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,69 @@ $ autorepro scan --json --show 3 - **Python**: `pyproject.toml`, `setup.py`, `requirements.txt`, `*.py` - **Rust**: `Cargo.toml`, `Cargo.lock`, `*.rs` +### Exec Command (Multi-execution Support) + +Executes commands from a reproduction plan with support for single or multiple command execution, early stopping, and structured logging. + +```bash +# Execute single command (default behavior) +$ autorepro exec --desc "pytest failing" +# Executes the top-ranked command + +# Execute all candidate commands in order +$ autorepro exec --desc "pytest failing" --all +# Runs all suggested commands sequentially + +# Execute specific commands by index +$ autorepro exec --desc "test issues" --indexes "0,2-4" +# Runs commands at indices 0, 2, 3, and 4 + +# Stop at first successful command +$ autorepro exec --desc "build problems" --until-success +# Stops execution after first command with exit code 0 + +# Multi-execution with JSONL logging +$ autorepro exec --desc "CI tests" --all --jsonl runs.jsonl --summary summary.json + +# Dry-run to preview selected commands +$ autorepro exec --desc "npm test" --indexes "1,3" --dry-run +[1] npm test --verbose +[3] npm run test:unit +``` + +**Multi-execution Features:** + +- **`--all`**: Execute all candidate commands in their original order +- **`--indexes "N,M-P"`**: Execute specific commands by indices/ranges (comma-separated) +- **`--until-success`**: Stop after the first command that exits with code 0 +- **`--summary FILE.json`**: Write final execution summary to JSON file +- **Command precedence**: `--indexes` takes precedence over `--all`, both override single `--index` + +**JSONL Output Format:** + +When using `--jsonl`, each execution produces a run record followed by a final summary: + +```jsonl +{"type": "run", "index": 0, "cmd": "pytest", "start_ts": "2025-09-13T12:00:00Z", "end_ts": "2025-09-13T12:00:05Z", "exit_code": 1, "duration_ms": 5000} +{"type": "run", "index": 1, "cmd": "pytest -v", "start_ts": "2025-09-13T12:00:05Z", "end_ts": "2025-09-13T12:00:08Z", "exit_code": 0, "duration_ms": 3000} +{"type": "summary", "schema_version": 1, "tool": "autorepro", "runs": 2, "successes": 1, "first_success_index": 1} +``` + +**Execution Options:** + +- `--timeout N`: Command timeout in seconds (default: 120) +- `--env KEY=VAL`: Set environment variable (repeatable) +- `--env-file PATH`: Load environment variables from file +- `--tee PATH`: Append full stdout/stderr to log file +- `--jsonl PATH`: Stream JSONL records for each run and summary +- `--dry-run`: Print selected commands without executing + +**Exit Behavior:** + +- **Single execution**: Returns the command's exit code +- **Multi-execution**: Returns 0 if any command succeeded, otherwise the last exit code +- **`--until-success`**: Returns 0 when a command succeeds, otherwise the last failure code + ### Init Command Creates a devcontainer.json file with default configuration (Python 3.11, Node 20, Go 1.22). The command is idempotent and provides atomic file writes. diff --git a/autorepro/cli.py b/autorepro/cli.py index 6da21b3..58dbafb 100644 --- a/autorepro/cli.py +++ b/autorepro/cli.py @@ -450,6 +450,27 @@ def _setup_exec_parser(subparsers) -> argparse.ArgumentParser: action="store_true", help="Exit with code 1 if no commands make the cut after filtering", ) + + # Multi-execution arguments + exec_parser.add_argument( + "--all", + action="store_true", + help="Execute all candidate commands in order", + ) + exec_parser.add_argument( + "--indexes", + help="Execute specific commands by indices/ranges (e.g., '0,2-3')", + ) + exec_parser.add_argument( + "--until-success", + action="store_true", + help="Stop after the first command that exits with code 0", + ) + exec_parser.add_argument( + "--summary", + help="Write final summary JSON to file", + ) + exec_parser.add_argument( "--profile", help="Named profile from .autorepro.toml to apply", @@ -755,6 +776,12 @@ class ExecConfig: min_score: int = field(default_factory=lambda: config.limits.min_score_threshold) strict: bool = False + # Multi-execution fields + all: bool = False + indexes: str | None = None + until_success: bool = False + summary_path: str | None = None + def validate(self) -> None: """Validate exec configuration and raise descriptive errors.""" # Mutual exclusivity validation @@ -768,6 +795,25 @@ def validate(self) -> None: "Must specify either --desc or --file", field="desc,file" ) + # Multi-execution validation + multi_exec_flags = sum( + [bool(self.all), bool(self.indexes), bool(self.until_success)] + ) + if multi_exec_flags > 0: + # If using multi-execution, validate combinations + if self.all and self.indexes: + # indexes takes precedence, just warn + pass + + # Validate indexes format if provided + if self.indexes: + try: + self._parse_indexes(self.indexes) + except ValueError as e: + raise FieldValidationError( + f"invalid indexes format: {e}", field="indexes" + ) from e + # Field validation if self.timeout <= 0: raise FieldValidationError( @@ -781,6 +827,45 @@ def validate(self) -> None: # File path validation is done later for proper I/O error handling + def _parse_indexes(self, indexes_str: str) -> list[int]: # noqa: C901 + """Parse indexes string like '0,2-3' into list of integers.""" + if not indexes_str.strip(): + raise ValueError("indexes string cannot be empty") + + result: list[int] = [] + for part in indexes_str.split(","): + part = part.strip() + if not part: + continue + + if "-" in part: + # Range like "2-5" + try: + start, end = part.split("-", 1) + start_idx = int(start.strip()) + end_idx = int(end.strip()) + if start_idx < 0 or end_idx < 0: + raise ValueError("indexes must be non-negative") + if start_idx > end_idx: + raise ValueError(f"invalid range {part}: start > end") + result.extend(range(start_idx, end_idx + 1)) + except ValueError as e: + if "invalid range" in str(e): + raise + raise ValueError(f"invalid range format: {part}") from e + else: + # Single index + try: + idx = int(part) + if idx < 0: + raise ValueError("indexes must be non-negative") + result.append(idx) + except ValueError: + raise ValueError(f"invalid index: {part}") from None + + # Remove duplicates and sort + return sorted(set(result)) + @dataclass @dataclass @@ -1418,6 +1503,51 @@ def _generate_exec_suggestions( return suggestions, None +def _resolve_command_selection( # noqa: PLR0911 + suggestions: list, config: ExecConfig +) -> tuple[list[int] | None, int | None]: + """ + Resolve which command indices to execute based on config. + + Returns: + Tuple of (selected_indices, error_code). If error_code is not None, should return it. + """ + if not suggestions: + return [], None + + # Determine selection mode + if config.indexes: + # --indexes takes precedence + try: + requested_indices = config._parse_indexes(config.indexes) + # Validate indices are within range + max_index = len(suggestions) - 1 + invalid_indices = [i for i in requested_indices if i > max_index] + if invalid_indices: + log = logging.getLogger("autorepro") + log.error( + f"Invalid indices {invalid_indices}: only {len(suggestions)} commands available (0-{max_index})" + ) + return None, 2 + return requested_indices, None + except ValueError as e: + log = logging.getLogger("autorepro") + log.error(f"Invalid indexes format: {e}") + return None, 1 + elif config.all: + # --all executes all commands + return list(range(len(suggestions))), None + else: + # Single command execution (original behavior) + if config.index >= len(suggestions): + log = logging.getLogger("autorepro") + log.error( + f"Index {config.index} out of range: only {len(suggestions)} commands available" + ) + return None, 2 + return [config.index], None + + def _select_exec_command( suggestions: list, config: ExecConfig ) -> tuple[tuple | None, int | None]: @@ -1599,7 +1729,7 @@ def _handle_exec_output_logging(results: dict, config: ExecConfig) -> None: log.error(f"Failed to write JSONL log: {e}") -def _execute_exec_pipeline(config: ExecConfig) -> int: +def _execute_exec_pipeline(config: ExecConfig) -> int: # noqa: PLR0911 """Execute the complete exec command pipeline.""" # Validate configuration config.validate() @@ -1620,22 +1750,35 @@ def _execute_exec_pipeline(config: ExecConfig) -> int: if error is not None: return error - # Select command by index - selected_command, error = _select_exec_command(suggestions, config) + # Resolve which commands to execute + selected_indices, error = _resolve_command_selection(suggestions, config) if error is not None: return error - assert ( - selected_command is not None - ) # Type assertion - we know command is valid if no error + assert selected_indices is not None - command_str, score, rationale = selected_command + # Handle empty selection + if not selected_indices: + log = logging.getLogger("autorepro") + log.error("No commands to execute") + return 1 # Handle dry-run if config.dry_run: - print(command_str) + for index in selected_indices: + command_str, score, rationale = suggestions[index] + print(f"[{index}] {command_str}") return 0 - return _execute_exec_command_real(command_str, repo_path, config) + # Execute commands (single or multiple) + if len(selected_indices) == 1 and not (config.jsonl_path or config.summary_path): + # Single command execution (backward compatible) + command_str, score, rationale = suggestions[selected_indices[0]] + return _execute_exec_command_real(command_str, repo_path, config) + else: + # Multi-command execution with JSONL support + return _execute_multiple_commands( + suggestions, selected_indices, repo_path, config + ) def _execute_exec_command_real( @@ -1668,6 +1811,156 @@ def _execute_exec_command_real( return results["exit_code"] +def _write_jsonl_record(file_path: str, record: dict) -> None: + """Write a single JSONL record to file.""" + try: + with open(file_path, "a", encoding="utf-8") as f: + f.write(json.dumps(record) + "\n") + except OSError as e: + log = logging.getLogger("autorepro") + log.error(f"Failed to write JSONL record: {e}") + + +def _create_run_record( + index: int, + command_str: str, + results: dict, + start_time: datetime, + end_time: datetime, +) -> dict: + """Create a run record for JSONL output.""" + return { + "type": "run", + "index": index, + "cmd": command_str, + "start_ts": start_time.strftime("%Y-%m-%dT%H:%M:%SZ"), + "end_ts": end_time.strftime("%Y-%m-%dT%H:%M:%SZ"), + "exit_code": results["exit_code"], + "duration_ms": results["duration_ms"], + # Optional fields for output paths if they exist + **( + {} + if not results.get("stdout_path") + else {"stdout_path": str(results["stdout_path"])} + ), + **( + {} + if not results.get("stderr_path") + else {"stderr_path": str(results["stderr_path"])} + ), + } + + +def _create_summary_record( + runs: int, successes: int, first_success_index: int | None +) -> dict: + """Create a summary record for JSONL output.""" + return { + "type": "summary", + "schema_version": 1, + "tool": "autorepro", + "runs": runs, + "successes": successes, + "first_success_index": first_success_index, + } + + +def _execute_multiple_commands( # noqa: C901, PLR0912 + suggestions: list, + selected_indices: list[int], + repo_path: Path | None, + config: ExecConfig, +) -> int: + """ + Execute multiple commands and handle JSONL output. + + Returns: + Final exit code (0 if any command succeeded, otherwise last exit code) + """ + log = logging.getLogger("autorepro") + + # Prepare environment variables + env, error = _prepare_exec_environment(config) + if error is not None: + return error + assert env is not None + + # Determine execution directory + exec_dir = repo_path if repo_path else Path.cwd() + + # Track execution results + runs = 0 + successes = 0 + first_success_index = None + last_exit_code = 0 + + # Execute selected commands + for _i, suggestion_index in enumerate(selected_indices): + command_str, score, rationale = suggestions[suggestion_index] + + log.info(f"Executing command {suggestion_index}: {command_str}") + + # Record start time + start_time = datetime.now() + + # Execute the command + results, error = _execute_command(command_str, env, exec_dir, config) + if error is not None: + return error + + # Record end time + end_time = datetime.now() + + runs += 1 + exit_code = results["exit_code"] + last_exit_code = exit_code + + # Track success + if exit_code == 0: + successes += 1 + if first_success_index is None: + first_success_index = suggestion_index + + # Write JSONL record if requested + if config.jsonl_path: + run_record = _create_run_record( + suggestion_index, command_str, results, start_time, end_time + ) + _write_jsonl_record(config.jsonl_path, run_record) + + # Handle output logging (for --tee only, not JSONL in multi-execution mode) + if config.tee_path: + _handle_exec_output_logging(results, config) + + # Print output to console (unless quiet) + if results["stdout_full"]: + print(results["stdout_full"], end="") + if results["stderr_full"]: + print(results["stderr_full"], file=sys.stderr, end="") + + # Check for early stopping + if config.until_success and exit_code == 0: + log.info(f"Stopping after first success (command {suggestion_index})") + break + + # Write summary record + summary_record = _create_summary_record(runs, successes, first_success_index) + + if config.jsonl_path: + _write_jsonl_record(config.jsonl_path, summary_record) + + if config.summary_path: + try: + with open(config.summary_path, "w", encoding="utf-8") as f: + json.dump(summary_record, f, indent=2) + except OSError as e: + log.error(f"Failed to write summary file: {e}") + return 1 + + # Return 0 if any command succeeded, otherwise last exit code + return 0 if successes > 0 else last_exit_code + + def cmd_exec(config: ExecConfig | None = None, **kwargs) -> int: """Handle the exec command.""" # Support backward compatibility with individual parameters @@ -1686,6 +1979,10 @@ def cmd_exec(config: ExecConfig | None = None, **kwargs) -> int: dry_run=kwargs.get("dry_run", False), min_score=kwargs.get("min_score", global_config.limits.min_score_threshold), strict=kwargs.get("strict", False), + all=kwargs.get("all", False), + indexes=kwargs.get("indexes"), + until_success=kwargs.get("until_success", False), + summary_path=kwargs.get("summary_path"), ) try: @@ -2032,6 +2329,10 @@ def _dispatch_exec_command(args) -> int: dry_run=args.dry_run, min_score=effective_min, strict=effective_strict, + all=getattr(args, "all", False), + indexes=getattr(args, "indexes", None), + until_success=getattr(args, "until_success", False), + summary_path=getattr(args, "summary", None), ) diff --git a/runs.jsonl b/runs.jsonl deleted file mode 100644 index 444daae..0000000 --- a/runs.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"schema_version": 1, "tool": "autorepro", "tool_version": "0.0.1", "cmd": "python -c \"import time, sys; time.sleep(3); sys.exit(0)\"", "index": 0, "cwd": "/Users/ali/autorepro", "start": "2025-09-01T10:35:50Z", "duration_ms": 1004, "exit_code": 124, "timed_out": true, "stdout_preview": "", "stderr_preview": "Command timed out after 1 seconds"} diff --git a/tests/test_exec_cli.py b/tests/test_exec_cli.py index 8827135..b6f37ed 100644 --- a/tests/test_exec_cli.py +++ b/tests/test_exec_cli.py @@ -113,18 +113,28 @@ def test_exec_with_python_repo_executes_command(self): # Should have created JSONL log assert jsonl_path.exists() - # Parse JSONL record + # Parse JSONL records (multi-execution format) with open(jsonl_path) as f: - record = json.loads(f.read().strip()) - - assert record["schema_version"] == 1 - assert record["tool"] == "autorepro" - assert "cmd" in record - assert "index" in record - assert "cwd" in record - assert "start" in record - assert "duration_ms" in record - assert "exit_code" in record + lines = f.read().strip().split("\n") + + # Should have at least a run record and summary record + assert len(lines) >= 2 + + # Parse the first line (run record) + run_record = json.loads(lines[0]) + assert run_record["type"] == "run" + assert "cmd" in run_record + assert "index" in run_record + assert "start_ts" in run_record + assert "end_ts" in run_record + assert "duration_ms" in run_record + assert "exit_code" in run_record + + # Parse the last line (summary record) + summary_record = json.loads(lines[-1]) + assert summary_record["type"] == "summary" + assert summary_record["schema_version"] == 1 + assert summary_record["tool"] == "autorepro" def test_exec_index_selects_command(self): """Test --index selects the N-th command.""" @@ -166,8 +176,11 @@ def test_exec_index_selects_command(self): # Check JSONL record has correct index if jsonl_path.exists(): with open(jsonl_path) as f: - record = json.loads(f.read().strip()) - assert record["index"] == 1 + lines = f.read().strip().split("\n") + # Parse the first line (run record) + run_record = json.loads(lines[0]) + assert run_record["type"] == "run" + assert run_record["index"] == 1 def test_exec_strict_empty_exits_with_error(self): """Test --strict with no commands exits 1.""" diff --git a/tests/test_exec_multi.py b/tests/test_exec_multi.py new file mode 100644 index 0000000..c762148 --- /dev/null +++ b/tests/test_exec_multi.py @@ -0,0 +1,533 @@ +"""Tests for multi-execution functionality in exec command.""" + +import json +import tempfile +from datetime import datetime +from pathlib import Path +from unittest.mock import patch + +import pytest + +from autorepro.cli import ( + ExecConfig, + _create_run_record, + _create_summary_record, + _resolve_command_selection, +) +from autorepro.config.exceptions import FieldValidationError + + +class TestExecConfigValidation: + """Test validation of multi-execution fields in ExecConfig.""" + + def test_valid_multi_execution_config(self): + """Test valid multi-execution configurations.""" + # Valid --all + config = ExecConfig(desc="test", all=True) + config.validate() # Should not raise + + # Valid --indexes + config = ExecConfig(desc="test", indexes="0,2-4") + config.validate() # Should not raise + + # Valid --until-success + config = ExecConfig(desc="test", until_success=True) + config.validate() # Should not raise + + def test_indexes_parsing_valid_formats(self): + """Test parsing of valid indexes formats.""" + config = ExecConfig(desc="test", indexes="0") + assert config._parse_indexes("0") == [0] + + config = ExecConfig(desc="test", indexes="0,2,4") + assert config._parse_indexes("0,2,4") == [0, 2, 4] + + config = ExecConfig(desc="test", indexes="0-3") + assert config._parse_indexes("0-3") == [0, 1, 2, 3] + + config = ExecConfig(desc="test", indexes="0,2-4,6") + assert config._parse_indexes("0,2-4,6") == [0, 2, 3, 4, 6] + + # Test duplicate removal and sorting + config = ExecConfig(desc="test", indexes="3,1,2,1") + assert config._parse_indexes("3,1,2,1") == [1, 2, 3] + + def test_indexes_parsing_invalid_formats(self): + """Test parsing of invalid indexes formats.""" + config = ExecConfig(desc="test", indexes="invalid") + + # Empty string + with pytest.raises(FieldValidationError, match="invalid indexes format"): + config.validate() + + # Invalid index + config.indexes = "abc" + with pytest.raises(FieldValidationError, match="invalid indexes format"): + config.validate() + + # Negative index + config.indexes = "-1" + with pytest.raises(FieldValidationError, match="invalid indexes format"): + config.validate() + + # Invalid range + config.indexes = "3-1" # start > end + with pytest.raises(FieldValidationError, match="invalid indexes format"): + config.validate() + + # Invalid range format + config.indexes = "1-2-3" + with pytest.raises(FieldValidationError, match="invalid indexes format"): + config.validate() + + +class TestCommandSelection: + """Test command selection logic for multi-execution.""" + + def test_resolve_command_selection_single_index(self): + """Test single index selection (default behavior).""" + suggestions = [ + ("cmd1", 5, "rationale1"), + ("cmd2", 4, "rationale2"), + ("cmd3", 3, "rationale3"), + ] + config = ExecConfig(desc="test", index=1) + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error is None + assert selected_indices == [1] + + def test_resolve_command_selection_all(self): + """Test --all flag selection.""" + suggestions = [ + ("cmd1", 5, "rationale1"), + ("cmd2", 4, "rationale2"), + ("cmd3", 3, "rationale3"), + ] + config = ExecConfig(desc="test", all=True) + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error is None + assert selected_indices == [0, 1, 2] + + def test_resolve_command_selection_indexes(self): + """Test --indexes flag selection.""" + suggestions = [ + ("cmd1", 5, "rationale1"), + ("cmd2", 4, "rationale2"), + ("cmd3", 3, "rationale3"), + ] + config = ExecConfig(desc="test", indexes="0,2") + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error is None + assert selected_indices == [0, 2] + + def test_resolve_command_selection_indexes_precedence(self): + """Test that --indexes takes precedence over --all.""" + suggestions = [ + ("cmd1", 5, "rationale1"), + ("cmd2", 4, "rationale2"), + ("cmd3", 3, "rationale3"), + ] + config = ExecConfig(desc="test", all=True, indexes="1") + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error is None + assert selected_indices == [1] # indexes takes precedence + + def test_resolve_command_selection_index_out_of_range(self): + """Test error handling for out-of-range indices.""" + suggestions = [("cmd1", 5, "rationale1"), ("cmd2", 4, "rationale2")] + config = ExecConfig(desc="test", index=5) + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error == 2 + assert selected_indices is None + + def test_resolve_command_selection_indexes_out_of_range(self): + """Test error handling for out-of-range indexes in --indexes.""" + suggestions = [("cmd1", 5, "rationale1"), ("cmd2", 4, "rationale2")] + config = ExecConfig(desc="test", indexes="0,5") + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error == 2 + assert selected_indices is None + + def test_resolve_command_selection_empty_suggestions(self): + """Test handling of empty suggestions list.""" + suggestions = [] + config = ExecConfig(desc="test", all=True) + + selected_indices, error = _resolve_command_selection(suggestions, config) + + assert error is None + assert selected_indices == [] + + +class TestJSONLRecords: + """Test JSONL record creation functions.""" + + def test_create_run_record(self): + """Test creation of run records.""" + results = { + "exit_code": 0, + "duration_ms": 1234, + "stdout_path": "/tmp/stdout.txt", + "stderr_path": "/tmp/stderr.txt", + } + start_time = datetime(2025, 9, 13, 12, 0, 0) + end_time = datetime(2025, 9, 13, 12, 0, 1) + + record = _create_run_record(2, "test command", results, start_time, end_time) + + assert record["type"] == "run" + assert record["index"] == 2 + assert record["cmd"] == "test command" + assert record["start_ts"] == "2025-09-13T12:00:00Z" + assert record["end_ts"] == "2025-09-13T12:00:01Z" + assert record["exit_code"] == 0 + assert record["duration_ms"] == 1234 + assert record["stdout_path"] == "/tmp/stdout.txt" + assert record["stderr_path"] == "/tmp/stderr.txt" + + def test_create_run_record_minimal(self): + """Test creation of run records with minimal fields.""" + results = { + "exit_code": 1, + "duration_ms": 500, + } + start_time = datetime(2025, 9, 13, 12, 0, 0) + end_time = datetime(2025, 9, 13, 12, 0, 1) + + record = _create_run_record(0, "test command", results, start_time, end_time) + + assert record["type"] == "run" + assert record["index"] == 0 + assert record["cmd"] == "test command" + assert record["exit_code"] == 1 + assert record["duration_ms"] == 500 + assert "stdout_path" not in record + assert "stderr_path" not in record + + def test_create_summary_record(self): + """Test creation of summary records.""" + record = _create_summary_record(5, 2, 1) + + assert record["type"] == "summary" + assert record["schema_version"] == 1 + assert record["tool"] == "autorepro" + assert record["runs"] == 5 + assert record["successes"] == 2 + assert record["first_success_index"] == 1 + + def test_create_summary_record_no_successes(self): + """Test creation of summary records with no successes.""" + record = _create_summary_record(3, 0, None) + + assert record["type"] == "summary" + assert record["runs"] == 3 + assert record["successes"] == 0 + assert record["first_success_index"] is None + + +class TestMultiExecutionIntegration: + """Integration tests for multi-execution functionality.""" + + @patch("autorepro.cli._handle_exec_output_logging") + @patch("autorepro.cli._execute_command") + @patch("autorepro.cli._prepare_exec_environment") + def test_multi_execution_all_commands(self, mock_env, mock_execute, mock_logging): + """Test executing all commands with --all flag.""" + from autorepro.cli import _execute_multiple_commands + + # Mock environment setup + mock_env.return_value = ({"PATH": "/usr/bin"}, None) + + # Mock command executions + mock_execute.side_effect = [ + ( + { + "exit_code": 1, + "duration_ms": 100, + "stdout_full": "", + "stderr_full": "", + }, + None, + ), + ( + { + "exit_code": 0, + "duration_ms": 200, + "stdout_full": "success", + "stderr_full": "", + }, + None, + ), + ( + { + "exit_code": 2, + "duration_ms": 150, + "stdout_full": "", + "stderr_full": "error", + }, + None, + ), + ] + + suggestions = [ + ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"), + ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"), + ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"), + ] + selected_indices = [0, 1, 2] + + with tempfile.TemporaryDirectory() as tmpdir: + jsonl_path = Path(tmpdir) / "runs.jsonl" + summary_path = Path(tmpdir) / "summary.json" + config = ExecConfig( + desc="test", + all=True, + jsonl_path=str(jsonl_path), + summary_path=str(summary_path), + ) + + exit_code = _execute_multiple_commands( + suggestions, selected_indices, None, config + ) + + # Should return 0 because one command succeeded + assert exit_code == 0 + + # Check JSONL output + assert jsonl_path.exists() + lines = jsonl_path.read_text().strip().split("\n") + assert len(lines) == 4 # 3 run records + 1 summary record + + # Check run records + run1 = json.loads(lines[0]) + assert run1["type"] == "run" + assert run1["index"] == 0 + assert run1["exit_code"] == 1 + + run2 = json.loads(lines[1]) + assert run2["type"] == "run" + assert run2["index"] == 1 + assert run2["exit_code"] == 0 + + run3 = json.loads(lines[2]) + assert run3["type"] == "run" + assert run3["index"] == 2 + assert run3["exit_code"] == 2 + + # Check summary record + summary = json.loads(lines[3]) + assert summary["type"] == "summary" + assert summary["runs"] == 3 + assert summary["successes"] == 1 + assert summary["first_success_index"] == 1 + + # Check summary file + assert summary_path.exists() + summary_file = json.loads(summary_path.read_text()) + assert summary_file == summary + + @patch("autorepro.cli._handle_exec_output_logging") + @patch("autorepro.cli._execute_command") + @patch("autorepro.cli._prepare_exec_environment") + def test_multi_execution_until_success(self, mock_env, mock_execute, mock_logging): + """Test executing commands with --until-success flag.""" + from autorepro.cli import _execute_multiple_commands + + # Mock environment setup + mock_env.return_value = ({"PATH": "/usr/bin"}, None) + + # Mock command executions - first fails, second succeeds + mock_execute.side_effect = [ + ( + { + "exit_code": 1, + "duration_ms": 100, + "stdout_full": "", + "stderr_full": "", + }, + None, + ), + ( + { + "exit_code": 0, + "duration_ms": 200, + "stdout_full": "success", + "stderr_full": "", + }, + None, + ), + ] + + suggestions = [ + ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"), + ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"), + ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"), + ] + selected_indices = [0, 1, 2] # All three, but should stop after second + + with tempfile.TemporaryDirectory() as tmpdir: + jsonl_path = Path(tmpdir) / "runs.jsonl" + config = ExecConfig( + desc="test", all=True, until_success=True, jsonl_path=str(jsonl_path) + ) + + exit_code = _execute_multiple_commands( + suggestions, selected_indices, None, config + ) + + # Should return 0 because a command succeeded + assert exit_code == 0 + + # Check JSONL output - should only have 2 run records + 1 summary + lines = jsonl_path.read_text().strip().split("\n") + assert len(lines) == 3 # 2 run records + 1 summary record + + # Check that execution stopped after success + summary = json.loads(lines[2]) + assert summary["runs"] == 2 + assert summary["successes"] == 1 + assert summary["first_success_index"] == 1 + + @patch("autorepro.cli._handle_exec_output_logging") + @patch("autorepro.cli._execute_command") + @patch("autorepro.cli._prepare_exec_environment") + def test_multi_execution_indexes(self, mock_env, mock_execute, mock_logging): + """Test executing specific commands with --indexes flag.""" + from autorepro.cli import _execute_multiple_commands + + # Mock environment setup + mock_env.return_value = ({"PATH": "/usr/bin"}, None) + + # Mock command executions + mock_execute.side_effect = [ + ( + { + "exit_code": 0, + "duration_ms": 100, + "stdout_full": "success", + "stderr_full": "", + }, + None, + ), + ( + { + "exit_code": 2, + "duration_ms": 150, + "stdout_full": "", + "stderr_full": "error", + }, + None, + ), + ] + + suggestions = [ + ("python3 -c 'import sys; sys.exit(1)'", 9, "rationale1"), + ("python3 -c 'import sys; sys.exit(0)'", 8, "rationale2"), + ("python3 -c 'import sys; sys.exit(2)'", 7, "rationale3"), + ] + selected_indices = [1, 2] # Only execute commands at indices 1 and 2 + + with tempfile.TemporaryDirectory() as tmpdir: + jsonl_path = Path(tmpdir) / "runs.jsonl" + config = ExecConfig(desc="test", indexes="1,2", jsonl_path=str(jsonl_path)) + + exit_code = _execute_multiple_commands( + suggestions, selected_indices, None, config + ) + + # Should return 0 because first selected command succeeded + assert exit_code == 0 + + # Check JSONL output + lines = jsonl_path.read_text().strip().split("\n") + assert len(lines) == 3 # 2 run records + 1 summary record + + # Check that correct commands were executed + run1 = json.loads(lines[0]) + assert run1["index"] == 1 + assert run1["exit_code"] == 0 + + run2 = json.loads(lines[1]) + assert run2["index"] == 2 + assert run2["exit_code"] == 2 + + summary = json.loads(lines[2]) + assert summary["runs"] == 2 + assert summary["successes"] == 1 + assert summary["first_success_index"] == 1 + + +class TestBackwardCompatibility: + """Test that multi-execution doesn't break existing functionality.""" + + def test_single_command_execution_unchanged(self): + """Test that single command execution behavior is unchanged.""" + from autorepro.cli import _execute_exec_pipeline + + with patch("autorepro.cli._validate_exec_repo_path") as mock_repo: + with patch("autorepro.cli._read_exec_input_text") as mock_text: + with patch( + "autorepro.cli._generate_exec_suggestions" + ) as mock_suggestions: + with patch("autorepro.cli._execute_exec_command_real") as mock_exec: + # Mock the pipeline components + mock_repo.return_value = (Path("/tmp"), None) + mock_text.return_value = ("test description", None) + mock_suggestions.return_value = ( + [("cmd1", 5, "rationale")], + None, + ) + mock_exec.return_value = 0 + + config = ExecConfig( + desc="test", index=0 + ) # No multi-execution flags + + exit_code = _execute_exec_pipeline(config) + + assert exit_code == 0 + # Should use single command execution path + mock_exec.assert_called_once() + + def test_jsonl_triggers_multi_execution_path(self): + """Test that --jsonl triggers multi-execution path even for single command.""" + from autorepro.cli import _execute_exec_pipeline + + with patch("autorepro.cli._validate_exec_repo_path") as mock_repo: + with patch("autorepro.cli._read_exec_input_text") as mock_text: + with patch( + "autorepro.cli._generate_exec_suggestions" + ) as mock_suggestions: + with patch( + "autorepro.cli._execute_multiple_commands" + ) as mock_multi_exec: + # Mock the pipeline components + mock_repo.return_value = (Path("/tmp"), None) + mock_text.return_value = ("test description", None) + mock_suggestions.return_value = ( + [("cmd1", 5, "rationale")], + None, + ) + mock_multi_exec.return_value = 0 + + config = ExecConfig( + desc="test", index=0, jsonl_path="runs.jsonl" + ) + + exit_code = _execute_exec_pipeline(config) + + assert exit_code == 0 + # Should use multi-execution path because of JSONL + mock_multi_exec.assert_called_once()