From e7602ea6e82d2ca9e4b33079144c954e559dd1f3 Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Mon, 9 Feb 2026 23:14:57 +0100
Subject: [PATCH 1/7] fix(mock): use RFC 7807 format for 422 validation
 response

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/mock-api-server/main.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/mock-api-server/main.py b/tests/mock-api-server/main.py
index 3f058c9f..2b9ebdc0 100644
--- a/tests/mock-api-server/main.py
+++ b/tests/mock-api-server/main.py
@@ -11,7 +11,7 @@
 """
 
 from fastapi import FastAPI, HTTPException, Response, Request
-from fastapi.responses import StreamingResponse
+from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 import os
@@ -247,13 +247,20 @@ async def run_app(name: str, run_params: Dict[str, Any]):
 
     parameters = run_params.get("parameters", {})
     if "nonexistent_param" in parameters:
-        raise HTTPException(
+        return JSONResponse(
             status_code=422,
-            detail={
-                "detail": "Validation error",
-                "status": 422,
+            content={
+                "$schema": "http://localhost:8081/v1/schemas/ErrorModel.json",
                 "title": "Unprocessable Entity",
-                "errors": [{"message": "Unknown parameter"}],
+                "status": 422,
+                "detail": "Validation error",
+                "errors": [
+                    {
+                        "message": "Unknown parameter",
+                        "location": "body.parameters",
+                        "value": parameters,
+                    }
+                ],
             },
         )
 

From 29e62cfbe20c022356a59178b0c5c8fb7a25944c Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Mon, 9 Feb 2026 23:15:36 +0100
Subject: [PATCH 2/7] fix(mock): match real server warning event and log stream
 format

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/mock-api-server/main.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/tests/mock-api-server/main.py b/tests/mock-api-server/main.py
index 2b9ebdc0..f504cc8b 100644
--- a/tests/mock-api-server/main.py
+++ b/tests/mock-api-server/main.py
@@ -537,7 +537,7 @@ def make_log_event(seq: int, line_num: int, content: str, timestamp: str):
 
 
 def make_warning_event(content: str, timestamp: str):
-    data = {"data": {"content": content, "reported_at": timestamp}, "event": "warning"}
+    data = {"content": content, "reported_at": timestamp}
     return f"event: warning\ndata: {json.dumps(data)}\n\n"
 
 
@@ -556,25 +556,31 @@ async def describe_run_logs(name: str, seq: int):
 
 
 async def generate_logs_after_completion_test_stream(seq: int):
-    """Log before run completion, then log after.
-
-    Timeline: Run completes at 1 second, second log sent at 1.5 seconds.
-    """
+    """Emit realistic runner logs then close, matching real server behavior."""
+    yield make_log_event(seq, 1, "Using CPython 3.12.9", "2025-08-22T12:00:00Z")
     yield make_log_event(
-        seq, 1, "First log before run completes", "2025-08-22T12:00:00Z"
+        seq, 2, "Creating virtual environment at: .venv", "2025-08-22T12:00:00Z"
     )
-    await asyncio.sleep(1.5)
+    await asyncio.sleep(0.5)
     yield make_log_event(
-        seq, 2, "Second log after run completes", "2025-08-22T12:00:01Z"
+        seq, 3, "Activate with: source .venv/bin/activate", "2025-08-22T12:00:01Z"
     )
+    yield make_log_event(seq, 4, "Hello, World!", "2025-08-22T12:00:01Z")
 
 
 async def generate_warning_log_stream(seq: int):
-    """Stream a warning and a couple of logs, then finish."""
-    yield make_warning_event("Rate limit approaching", "2025-08-22T12:00:00Z")
-    yield make_log_event(seq, 1, "Warning stream log 1", "2025-08-22T12:00:00Z")
-    await asyncio.sleep(1.2)
-    yield make_log_event(seq, 2, "Warning stream log 2", "2025-08-22T12:00:01Z")
+    """Stream logs then emit warning before closing, matching real server behavior."""
+    yield make_log_event(seq, 1, "Using CPython 3.12.9", "2025-08-22T12:00:00Z")
+    yield make_log_event(
+        seq, 2, "Creating virtual environment at: .venv", "2025-08-22T12:00:00Z"
+    )
+    await asyncio.sleep(0.5)
+    yield make_log_event(
+        seq, 3, "Activate with: source .venv/bin/activate", "2025-08-22T12:00:00Z"
+    )
+    yield make_log_event(seq, 4, "Hello, World!", "2025-08-22T12:00:01Z")
+    await asyncio.sleep(0.5)
+    yield make_warning_event("No new logs available", "2025-08-22T12:00:02Z")
 
 
 async def generate_normal_log_stream(seq: int):

From 6f0b70084c9ea04bc2731dc7e34b5f8cbb17344e Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Mon, 9 Feb 2026 23:18:21 +0100
Subject: [PATCH 3/7] fix(mock): add name field to schedule update endpoint

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/mock-api-server/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/mock-api-server/main.py b/tests/mock-api-server/main.py
index f504cc8b..11f2e2ba 100644
--- a/tests/mock-api-server/main.py
+++ b/tests/mock-api-server/main.py
@@ -657,6 +657,8 @@ async def update_schedule(id_or_name: str, schedule_data: Dict[str, Any]):
     schedule = mock_schedules_db[id_or_name]
     if "cron" in schedule_data:
         schedule["cron"] = schedule_data["cron"]
+    if "name" in schedule_data:
+        schedule["name"] = schedule_data["name"]
     if "parameters" in schedule_data:
         schedule["parameters"] = schedule_data["parameters"]
     schedule["updated_at"] = now_iso()

From 8dc4959d7cce8266cf0a824d8d952a5ca1e4a474 Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Mon, 9 Feb 2026 23:20:17 +0100
Subject: [PATCH 4/7] test: update BDD assertions to match real server
 responses

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/integration/features/cli_runs.feature           | 10 ++++------
 tests/integration/features/mcp_app_management.feature |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/integration/features/cli_runs.feature b/tests/integration/features/cli_runs.feature
index ede7b436..4df64d6e 100644
--- a/tests/integration/features/cli_runs.feature
+++ b/tests/integration/features/cli_runs.feature
@@ -19,7 +19,7 @@ Feature: CLI Run Commands
     Given I have a valid Towerfile in the current directory
     When I run "tower deploy --create" via CLI
     Then I run "tower run -p nonexistent_param=test" via CLI
-    Then the output should show "API Error:"
+    Then the output should show "Error details:"
     And the output should show "Validation error"
     And the output should show "Unknown parameter"
     And the output should not just show "422"
@@ -36,20 +36,18 @@ Feature: CLI Run Commands
     Given I have a simple hello world application named "app-logs-after-completion"
     When I run "tower deploy --create" via CLI
     And I run "tower run" via CLI
-    Then the output should show "First log before run completes"
-    And the output should show "Second log after run completes"
+    Then the output should show "Hello, World!"
 
   Scenario: CLI apps logs follow should stream logs and drain after completion
     Given I have a simple hello world application named "app-logs-after-completion"
     When I run "tower deploy --create" via CLI
     And I run "tower run --detached" via CLI and capture run number
     And I run "tower apps logs --follow {app_name}#{run_number}" via CLI using created app name and run number
-    Then the output should show "First log before run completes"
-    And the output should show "Second log after run completes"
+    Then the output should show "Hello, World!"
 
   Scenario: CLI apps logs follow should display warnings
     Given I have a simple hello world application named "app-logs-warning"
     When I run "tower deploy --create" via CLI
     And I run "tower run --detached" via CLI and capture run number
     And I run "tower apps logs --follow {app_name}#{run_number}" via CLI using created app name and run number
-    Then the output should show "Warning: Rate limit approaching"
+    Then the output should show "Warning: No new logs available"
diff --git a/tests/integration/features/mcp_app_management.feature b/tests/integration/features/mcp_app_management.feature
index 39899bd3..ccae5790 100644
--- a/tests/integration/features/mcp_app_management.feature
+++ b/tests/integration/features/mcp_app_management.feature
@@ -100,7 +100,7 @@ Feature: MCP App Management
     When I call tower_deploy via MCP
     Then I call tower_run_remote with invalid parameter "nonexistent_param=test"
     Then I should receive a detailed validation error
-    And the error should mention "Unknown parameter"
+    And the error should mention "Validation error"
     And the error should not just be a status code
 
   Scenario: Local run should detect exit code failures

From 3da6a90479b6b985ddb6a2c5c5b84bdb1d918d7b Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Mon, 9 Feb 2026 23:21:24 +0100
Subject: [PATCH 5/7] feat(test-runner): support running tests against external
 server

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/integration/features/steps/cli_steps.py | 26 ++++--
 tests/integration/run_tests.py                | 83 ++++++++++++++-----
 2 files changed, 82 insertions(+), 27 deletions(-)

diff --git a/tests/integration/features/steps/cli_steps.py b/tests/integration/features/steps/cli_steps.py
index afd2997e..8b01e354 100644
--- a/tests/integration/features/steps/cli_steps.py
+++ b/tests/integration/features/steps/cli_steps.py
@@ -30,7 +30,11 @@ def step_run_cli_command(context, command):
         test_env = os.environ.copy()
         test_env["FORCE_COLOR"] = "1"  # Force colored output
         test_env["CLICOLOR_FORCE"] = "1"  # Force colored output
-        test_env["TOWER_URL"] = context.tower_url  # Use mock API
+        test_env["TOWER_URL"] = context.tower_url  # Use configured API URL
+
+        # Only set mock JWT if not already configured externally
+        if "TOWER_JWT" not in os.environ:
+            test_env["TOWER_JWT"] = "mock_jwt_token"
 
         # Override HOME to use test session (which contains auth credentials)
         test_home = Path(__file__).parent.parent.parent / "test-home"
@@ -44,9 +48,11 @@ def step_run_cli_command(context, command):
             env=test_env,
         )
         context.cli_output = result.stdout + result.stderr
+        context.cli_stdout = result.stdout
         context.cli_return_code = result.returncode
     except subprocess.TimeoutExpired:
         context.cli_output = "Command timed out"
+        context.cli_stdout = ""
         context.cli_return_code = 124
     except Exception as e:
         print(f"DEBUG: Exception in CLI command: {type(e).__name__}: {e}")
@@ -266,11 +272,17 @@ def step_table_should_show_columns(context, column_list):
         assert column in output, f"Expected column '{column}' in table, got: {output}"
 
 
+def parse_cli_json(context):
+    """Parse JSON from CLI stdout (excludes stderr)."""
+    raw = getattr(context, "cli_stdout", context.cli_output)
+    return json.loads(raw)
+
+
 @step("the output should be valid JSON")
 def step_output_should_be_valid_json(context):
     """Verify output is valid JSON"""
     try:
-        json.loads(context.cli_output)
+        parse_cli_json(context)
     except json.JSONDecodeError as e:
         raise AssertionError(
             f"Output is not valid JSON: {e}\nOutput: {context.cli_output}"
@@ -280,7 +292,7 @@ def step_output_should_be_valid_json(context):
 @step("the JSON should contain app information")
 def step_json_should_contain_app_info(context):
     """Verify JSON contains app-related information"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     assert (
         "app" in data or "name" in data
     ), f"Expected app information in JSON, got: {data}"
@@ -289,7 +301,7 @@ def step_json_should_contain_app_info(context):
 @step("the JSON should contain runs array")
 def step_json_should_contain_runs_array(context):
     """Verify JSON contains runs array"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     assert "runs" in data and isinstance(
         data["runs"], list
     ), f"Expected runs array in JSON, got: {data}"
@@ -298,7 +310,7 @@ def step_json_should_contain_runs_array(context):
 @step("the JSON should contain the created app information")
 def step_json_should_contain_created_app_info(context):
     """Verify JSON contains created app information"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
 
     expected = IsPartialDict(
         result="success",
@@ -318,7 +330,7 @@ def step_json_should_contain_created_app_info(context):
 @step('the app name should be "{expected_name}"')
 def step_app_name_should_be(context, expected_name):
     """Verify app name matches expected value"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     # Extract app name from response structure
     if "app" in data and "name" in data["app"]:
         actual_name = data["app"]["name"]
@@ -337,7 +349,7 @@ def step_app_name_should_be(context, expected_name):
 @step('the app description should be "{expected_description}"')
 def step_app_description_should_be(context, expected_description):
     """Verify app description matches expected value"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     candidates = []
 
     if "app" in data:
diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py
index b9612e62..6ce8b8bb 100755
--- a/tests/integration/run_tests.py
+++ b/tests/integration/run_tests.py
@@ -2,6 +2,12 @@
 """
 Simple test runner for Tower MCP integration tests.
 Assumes dependencies are already installed via nix devShell.
+
+Supports two modes:
+1. Mock mode (default): Starts local mock server at http://127.0.0.1:8000
+2. Real server mode: Set TOWER_URL env var to use external server
+   - Skips mock server startup
+   - Preserves externally configured session.json
 """
 
 import os
@@ -18,12 +24,41 @@ def log(msg):
     print(f"\033[36m[test-runner]\033[0m {msg}")
 
 
+def is_session_externally_configured(test_home):
+    """Check if session.json has been externally configured.
+
+    Returns True if session.json is newer than the git HEAD version,
+    indicating it was set up by external orchestration (e.g., from monorepo).
+    """
+    session_file = test_home / ".config" / "tower" / "session.json"
+    if not session_file.exists():
+        return False
+
+    try:
+        result = subprocess.run(
+            ["git", "log", "-1", "--format=%ct", str(session_file)],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        git_mtime = int(result.stdout.strip())
+        file_mtime = int(session_file.stat().st_mtime)
+        return file_mtime > git_mtime
+    except (subprocess.CalledProcessError, ValueError, FileNotFoundError):
+        return False
+
+
 def reset_session_fixture(test_home):
     """Reset the session.json fixture to its committed state before tests.
 
     The CLI may modify session.json during MCP operations (like team switching),
     so we restore it to the canonical committed version before each test run.
+    Skips reset if session appears to be externally configured.
     """
+    if is_session_externally_configured(test_home):
+        log("Skipping session.json reset (externally configured)")
+        return
+
     session_file = test_home / ".config" / "tower" / "session.json"
     subprocess.run(
         ["git", "checkout", str(session_file)],
@@ -96,32 +131,40 @@ def main():
 
     # Set up environment
     env = os.environ.copy()
-    if "TOWER_URL" not in env:
-        env["TOWER_URL"] = "http://127.0.0.1:8000"
 
     # Set HOME to test-home directory to isolate session from user's real config
     test_home = Path(__file__).parent / "test-home"
     env["HOME"] = str(test_home.absolute())
 
-    log(f"Using API URL: \033[1m{env['TOWER_URL']}\033[0m")
-    log(f"Using test HOME: \033[1m{env['HOME']}\033[0m")
-
-    # Ensure mock server is running
-    mock_process = None
-    if not check_mock_server_health(env["TOWER_URL"]):
-        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        port_in_use = sock.connect_ex(("127.0.0.1", 8000)) == 0
-        sock.close()
-
-        if port_in_use:
-            log(
-                "ERROR: Port 8000 is in use but not responding to health check (some unrelated server?)."
-            )
-            return 1
-
-        mock_process = start_mock_server()
+    # Determine if we're using external configuration or mock server
+    tower_url_preset = "TOWER_URL" in os.environ
+    if tower_url_preset:
+        server_url = env["TOWER_URL"]
+        mock_process = None
+        log(f"Using externally configured API URL: \033[1m{server_url}\033[0m")
     else:
-        log("Mock server already running and healthy")
+        server_url = "http://127.0.0.1:8000"
+        env["TOWER_URL"] = server_url
+        log(f"Using mock server API URL: \033[1m{server_url}\033[0m")
+
+        # Ensure mock server is running
+        mock_process = None
+        if not check_mock_server_health(server_url):
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            port_in_use = sock.connect_ex(("127.0.0.1", 8000)) == 0
+            sock.close()
+
+            if port_in_use:
+                log(
+                    "ERROR: Port 8000 is in use but not responding to health check (some unrelated server?)."
+                )
+                return 1
+
+            mock_process = start_mock_server()
+        else:
+            log("Mock server already running and healthy")
+
+    log(f"Using test HOME: \033[1m{env['HOME']}\033[0m")
 
     # Actually run tests
     try:

From 52a1e585224e1f5564ffb6d0400e973b38c2b0d0 Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Wed, 18 Feb 2026 20:06:47 +0100
Subject: [PATCH 6/7] chore: make CLI build location configurable in tests

---
 tests/integration/features/environment.py | 4 ++++
 tests/integration/run_tests.py            | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/integration/features/environment.py b/tests/integration/features/environment.py
index d486ea4b..efd5f664 100644
--- a/tests/integration/features/environment.py
+++ b/tests/integration/features/environment.py
@@ -45,6 +45,10 @@ def after_scenario(context, scenario):
 
 
 def _find_tower_binary():
+    if binary := os.environ.get("TOWER_CLI_BINARY"):
+        if Path(binary).exists():
+            return binary
+
     # Look for debug build first
     debug_path = (
         Path(__file__).parent.parent.parent.parent / "target" / "debug" / "tower"
diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py
index 6ce8b8bb..595c3a59 100755
--- a/tests/integration/run_tests.py
+++ b/tests/integration/run_tests.py
@@ -103,7 +103,8 @@ def start_mock_server():
 
 def main():
     """Run the integration tests."""
-    # Check prerequisites - look for tower binary from cargo build or on PATH
+    # Check prerequisites - look for tower binary from env, cargo build, or PATH
+    has_env_binary = bool(os.environ.get("TOWER_CLI_BINARY") and Path(os.environ["TOWER_CLI_BINARY"]).exists())
     project_root = Path(__file__).parent.parent.parent
     has_cargo_binary = any(
         (project_root / "target" / build / "tower").exists()
@@ -115,7 +116,7 @@ def main():
         else False
     )
 
-    if not has_cargo_binary and not has_path_binary:
+    if not has_env_binary and not has_cargo_binary and not has_path_binary:
         log(
             "ERROR: Tower binary not found. Please run 'cargo build' or 'maturin develop' first."
         )

From e6f1da450fcd486df089991a13ff3a1f9a2cea6c Mon Sep 17 00:00:00 2001
From: Ben Lovell <ben.j.lovell@gmail.com>
Date: Thu, 19 Feb 2026 17:11:14 +0100
Subject: [PATCH 7/7] =?UTF-8?q?fix:=20TOWER=5FJWT=20being=20set=20makes=20?=
 =?UTF-8?q?it=20ignore=20the=20actual=20mock=20one=20in=20the=20repo=20?=
 =?UTF-8?q?=F0=9F=A4=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/features/steps/cli_steps.py | 4 ----
 tests/integration/run_tests.py                | 5 ++++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/integration/features/steps/cli_steps.py b/tests/integration/features/steps/cli_steps.py
index 8b01e354..1d8151f5 100644
--- a/tests/integration/features/steps/cli_steps.py
+++ b/tests/integration/features/steps/cli_steps.py
@@ -32,10 +32,6 @@ def step_run_cli_command(context, command):
         test_env["CLICOLOR_FORCE"] = "1"  # Force colored output
         test_env["TOWER_URL"] = context.tower_url  # Use configured API URL
 
-        # Only set mock JWT if not already configured externally
-        if "TOWER_JWT" not in os.environ:
-            test_env["TOWER_JWT"] = "mock_jwt_token"
-
         # Override HOME to use test session (which contains auth credentials)
         test_home = Path(__file__).parent.parent.parent / "test-home"
         test_env["HOME"] = str(test_home.absolute())
diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py
index 595c3a59..9b1174d4 100755
--- a/tests/integration/run_tests.py
+++ b/tests/integration/run_tests.py
@@ -104,7 +104,10 @@ def start_mock_server():
 def main():
     """Run the integration tests."""
     # Check prerequisites - look for tower binary from env, cargo build, or PATH
-    has_env_binary = bool(os.environ.get("TOWER_CLI_BINARY") and Path(os.environ["TOWER_CLI_BINARY"]).exists())
+    has_env_binary = bool(
+        os.environ.get("TOWER_CLI_BINARY")
+        and Path(os.environ["TOWER_CLI_BINARY"]).exists()
+    )
     project_root = Path(__file__).parent.parent.parent
     has_cargo_binary = any(
         (project_root / "target" / build / "tower").exists()