From d8b69ab6fe37524bf9e79364e90618ae1261df40 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 15:03:06 +0000 Subject: [PATCH 1/6] Disable rich logging by default to fix multiprocessing deadlock Fixes #476 Root Cause: ----------- The SDK's RichHandler (from the rich library) creates locks and threads for console rendering. When used with multiprocessing.ProcessPoolExecutor with the default 'fork' start method, these locks/threads cause deadlock during concurrent image builds. Solution: --------- Set LOG_JSON=1 environment variable to use JsonFormatter instead of RichHandler. This avoids the threading issues entirely without needing to change the multiprocessing start method to 'spawn' (which could cause OOM issues). Changes: -------- 1. sitecustomize.py: Set LOG_JSON=1 early, before any SDK imports 2. All build_images.py files: Set LOG_JSON=1 as a fallback in case sitecustomize isn't loaded (e.g., in non-standard environments) Co-authored-by: openhands --- benchmarks/commit0/build_images.py | 13 +++++++++---- benchmarks/gaia/build_images.py | 10 ++++++++-- benchmarks/multiswebench/build_images.py | 13 ++++++++++--- benchmarks/swebench/build_images.py | 18 ++++++++++++------ benchmarks/swebenchmultimodal/build_images.py | 14 ++++++++++---- benchmarks/swegym/build_images.py | 12 +++++++++--- benchmarks/swesmith/build_images.py | 12 +++++++++--- benchmarks/swtbench/build_images.py | 12 +++++++++--- sitecustomize.py | 6 ++++++ 9 files changed, 82 insertions(+), 28 deletions(-) diff --git a/benchmarks/commit0/build_images.py b/benchmarks/commit0/build_images.py index 3f24567ec..67f85a6c9 100644 --- a/benchmarks/commit0/build_images.py +++ b/benchmarks/commit0/build_images.py @@ -11,15 +11,20 @@ import os import sys -from commit0.harness.constants import SPLIT -from benchmarks.commit0.config import BUILD_DEFAULTS, INFER_DEFAULTS -from benchmarks.utils.build_utils import ( +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from commit0.harness.constants import SPLIT # noqa: E402 + +from benchmarks.commit0.config import BUILD_DEFAULTS, INFER_DEFAULTS # noqa: E402 +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, ) -from openhands.sdk import get_logger +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/gaia/build_images.py b/benchmarks/gaia/build_images.py index 9f6b90cd1..5139d3c60 100644 --- a/benchmarks/gaia/build_images.py +++ b/benchmarks/gaia/build_images.py @@ -10,10 +10,16 @@ --image ghcr.io/openhands/eval-agent-server --target binary-minimal --push """ +import os import sys from pathlib import Path -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.utils.build_utils import ( # noqa: E402 BuildOutput, _get_sdk_submodule_info, build_all_images, @@ -21,7 +27,7 @@ get_build_parser, run_docker_build_layer, ) -from openhands.sdk import get_logger +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/multiswebench/build_images.py b/benchmarks/multiswebench/build_images.py index 6c3ef3b9f..c9bef4bef 100644 --- a/benchmarks/multiswebench/build_images.py +++ b/benchmarks/multiswebench/build_images.py @@ -12,13 +12,20 @@ import os from pathlib import Path -from benchmarks.multiswebench.download_dataset import download_and_concat_dataset -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.multiswebench.download_dataset import ( + download_and_concat_dataset, # noqa: E402 +) +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, ) -from openhands.sdk import get_logger +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index 5ace5419b..7a1e69b33 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -9,21 +9,27 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ +import os import sys from pathlib import Path -from benchmarks.swebench import constants -from benchmarks.swebench.config import BUILD_DEFAULTS -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.swebench import constants # noqa: E402 +from benchmarks.swebench.config import BUILD_DEFAULTS # noqa: E402 +from benchmarks.utils.build_utils import ( # noqa: E402 BuildOutput, build_all_images, default_build_output_dir, get_build_parser, run_docker_build_layer, ) -from benchmarks.utils.dataset import get_dataset -from benchmarks.utils.image_utils import remote_image_exists -from openhands.sdk import get_logger +from benchmarks.utils.dataset import get_dataset # noqa: E402 +from benchmarks.utils.image_utils import remote_image_exists # noqa: E402 +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/swebenchmultimodal/build_images.py b/benchmarks/swebenchmultimodal/build_images.py index 987cf7bda..993deae88 100644 --- a/benchmarks/swebenchmultimodal/build_images.py +++ b/benchmarks/swebenchmultimodal/build_images.py @@ -8,16 +8,22 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ +import os import sys -from benchmarks.swebenchmultimodal.config import BUILD_DEFAULTS -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.swebenchmultimodal.config import BUILD_DEFAULTS # noqa: E402 +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset -from openhands.sdk import get_logger +from benchmarks.utils.dataset import get_dataset # noqa: E402 +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/swegym/build_images.py b/benchmarks/swegym/build_images.py index 6c116a020..feadc38da 100644 --- a/benchmarks/swegym/build_images.py +++ b/benchmarks/swegym/build_images.py @@ -7,15 +7,21 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ +import os import sys -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset -from openhands.sdk import get_logger +from benchmarks.utils.dataset import get_dataset # noqa: E402 +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/swesmith/build_images.py b/benchmarks/swesmith/build_images.py index 51fda90ae..8d7184fd7 100644 --- a/benchmarks/swesmith/build_images.py +++ b/benchmarks/swesmith/build_images.py @@ -8,15 +8,21 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ +import os import sys -from benchmarks.utils.build_utils import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset -from openhands.sdk import get_logger +from benchmarks.utils.dataset import get_dataset # noqa: E402 +from openhands.sdk import get_logger # noqa: E402 logger = get_logger(__name__) diff --git a/benchmarks/swtbench/build_images.py b/benchmarks/swtbench/build_images.py index 3fcd2d8de..f44541bad 100644 --- a/benchmarks/swtbench/build_images.py +++ b/benchmarks/swtbench/build_images.py @@ -9,15 +9,21 @@ Note: SWT-bench uses max_workers=16 (vs SWE-bench's 32) via BUILD_DEFAULTS. """ +import os import sys -from benchmarks.swebench.build_images import ( + +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +os.environ.setdefault("LOG_JSON", "1") + +from benchmarks.swebench.build_images import ( # noqa: E402 _wrap_if_needed, collect_unique_base_images, extract_custom_tag, ) -from benchmarks.swtbench.config import BUILD_DEFAULTS -from benchmarks.utils.build_utils import ( +from benchmarks.swtbench.config import BUILD_DEFAULTS # noqa: E402 +from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, default_build_output_dir, get_build_parser, diff --git a/sitecustomize.py b/sitecustomize.py index 50338d1a8..ade7fba99 100644 --- a/sitecustomize.py +++ b/sitecustomize.py @@ -6,9 +6,15 @@ this file at the repo root guarantees the patch runs before swebench is used. """ +import os import sys +# Disable rich logging to avoid threading issues with multiprocessing. +# Rich's RichHandler creates locks and threads that don't play well with fork(). +# Set this early before any SDK imports happen. +os.environ.setdefault("LOG_JSON", "1") + print("benchmarks sitecustomize imported", file=sys.stderr, flush=True) try: From 31bc5fb57eeeb3157bb886964b7e2a5546fbfaae Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 19:00:31 +0000 Subject: [PATCH 2/6] Use direct assignment instead of setdefault for LOG_JSON Changed os.environ.setdefault('LOG_JSON', '1') to os.environ['LOG_JSON'] = '1' to ensure the environment variable is always set, even if it was previously set to a different value. This ensures Rich logging is disabled regardless of the environment state. Co-authored-by: openhands --- benchmarks/commit0/build_images.py | 2 +- benchmarks/gaia/build_images.py | 2 +- benchmarks/multiswebench/build_images.py | 2 +- benchmarks/swebench/build_images.py | 2 +- benchmarks/swebenchmultimodal/build_images.py | 2 +- benchmarks/swegym/build_images.py | 2 +- benchmarks/swesmith/build_images.py | 2 +- benchmarks/swtbench/build_images.py | 2 +- sitecustomize.py | 3 ++- 9 files changed, 10 insertions(+), 9 deletions(-) diff --git a/benchmarks/commit0/build_images.py b/benchmarks/commit0/build_images.py index 67f85a6c9..02e36a76c 100644 --- a/benchmarks/commit0/build_images.py +++ b/benchmarks/commit0/build_images.py @@ -14,7 +14,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from commit0.harness.constants import SPLIT # noqa: E402 diff --git a/benchmarks/gaia/build_images.py b/benchmarks/gaia/build_images.py index 5139d3c60..dc50c39cb 100644 --- a/benchmarks/gaia/build_images.py +++ b/benchmarks/gaia/build_images.py @@ -17,7 +17,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.utils.build_utils import ( # noqa: E402 BuildOutput, diff --git a/benchmarks/multiswebench/build_images.py b/benchmarks/multiswebench/build_images.py index c9bef4bef..bdb970c38 100644 --- a/benchmarks/multiswebench/build_images.py +++ b/benchmarks/multiswebench/build_images.py @@ -15,7 +15,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.multiswebench.download_dataset import ( download_and_concat_dataset, # noqa: E402 diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index 7a1e69b33..3c1a7544d 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -16,7 +16,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.swebench import constants # noqa: E402 from benchmarks.swebench.config import BUILD_DEFAULTS # noqa: E402 diff --git a/benchmarks/swebenchmultimodal/build_images.py b/benchmarks/swebenchmultimodal/build_images.py index 993deae88..4cecf88b5 100644 --- a/benchmarks/swebenchmultimodal/build_images.py +++ b/benchmarks/swebenchmultimodal/build_images.py @@ -14,7 +14,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.swebenchmultimodal.config import BUILD_DEFAULTS # noqa: E402 from benchmarks.utils.build_utils import ( # noqa: E402 diff --git a/benchmarks/swegym/build_images.py b/benchmarks/swegym/build_images.py index feadc38da..5f679d477 100644 --- a/benchmarks/swegym/build_images.py +++ b/benchmarks/swegym/build_images.py @@ -13,7 +13,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, diff --git a/benchmarks/swesmith/build_images.py b/benchmarks/swesmith/build_images.py index 8d7184fd7..d6a36e929 100644 --- a/benchmarks/swesmith/build_images.py +++ b/benchmarks/swesmith/build_images.py @@ -14,7 +14,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.utils.build_utils import ( # noqa: E402 build_all_images, diff --git a/benchmarks/swtbench/build_images.py b/benchmarks/swtbench/build_images.py index f44541bad..1af9c9345 100644 --- a/benchmarks/swtbench/build_images.py +++ b/benchmarks/swtbench/build_images.py @@ -15,7 +15,7 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ.setdefault("LOG_JSON", "1") +os.environ["LOG_JSON"] = "1" from benchmarks.swebench.build_images import ( # noqa: E402 _wrap_if_needed, diff --git a/sitecustomize.py b/sitecustomize.py index ade7fba99..2c37e20e9 100644 --- a/sitecustomize.py +++ b/sitecustomize.py @@ -13,7 +13,8 @@ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). # Set this early before any SDK imports happen. -os.environ.setdefault("LOG_JSON", "1") +# Use direct assignment instead of setdefault to ensure it's always set. +os.environ["LOG_JSON"] = "1" print("benchmarks sitecustomize imported", file=sys.stderr, flush=True) From 32da7d0da86eaf8349410516f001bc1f60cc7c97 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 19:37:13 +0000 Subject: [PATCH 3/6] Refactor: centralize LOG_JSON setting for maintainability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PROBLEM: Previous approach set LOG_JSON in 9+ files (sitecustomize.py + all build_images.py files), making it difficult to maintain. SOLUTION: 1. Centralized LOG_JSON setting in sitecustomize.py (auto-loaded by Python) 2. Removed LOG_JSON from all build_images.py files 3. Added LOG_JSON to GitHub Actions workflows as defense-in-depth BENEFITS: - Single source of truth: sitecustomize.py - Easy to maintain: only 2 logical places (sitecustomize + CI workflows) - Defense in depth: sitecustomize for dev, workflows for CI - Reduced code duplication TESTING: - ✅ All 90 tests pass - ✅ Pre-commit checks pass (ruff, pycodestyle, pyright) - ✅ Verified sitecustomize.py loads automatically with uv run - ✅ Confirmed JsonFormatter is used instead of RichHandler Co-authored-by: openhands --- .github/workflows/build-commit0-images.yml | 1 + .github/workflows/build-gaia-images.yml | 1 + .../workflows/build-multiswebench-images.yml | 1 + .github/workflows/build-swebench-images.yml | 1 + .../build-swebenchmultimodal-images.yml | 1 + .github/workflows/build-swegym-images.yml | 1 + .github/workflows/build-swesmith-images.yml | 1 + .github/workflows/build-swtbench-images.yml | 1 + benchmarks/commit0/build_images.py | 13 ++++--------- benchmarks/gaia/build_images.py | 10 ++-------- benchmarks/multiswebench/build_images.py | 13 +++---------- benchmarks/swebench/build_images.py | 18 ++++++------------ benchmarks/swebenchmultimodal/build_images.py | 14 ++++---------- benchmarks/swegym/build_images.py | 12 +++--------- benchmarks/swesmith/build_images.py | 12 +++--------- benchmarks/swtbench/build_images.py | 12 +++--------- sitecustomize.py | 8 ++++++-- 17 files changed, 42 insertions(+), 78 deletions(-) diff --git a/.github/workflows/build-commit0-images.yml b/.github/workflows/build-commit0-images.yml index 5d821d2e5..72ae42bf6 100644 --- a/.github/workflows/build-commit0-images.yml +++ b/.github/workflows/build-commit0-images.yml @@ -183,6 +183,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain diff --git a/.github/workflows/build-gaia-images.yml b/.github/workflows/build-gaia-images.yml index 568e63113..dba24bbdf 100644 --- a/.github/workflows/build-gaia-images.yml +++ b/.github/workflows/build-gaia-images.yml @@ -97,6 +97,7 @@ jobs: echo "✅ GAIA image with MCP layer built and pushed successfully" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain diff --git a/.github/workflows/build-multiswebench-images.yml b/.github/workflows/build-multiswebench-images.yml index 8999c0a9b..e743b5a0e 100644 --- a/.github/workflows/build-multiswebench-images.yml +++ b/.github/workflows/build-multiswebench-images.yml @@ -248,6 +248,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swebench-images.yml b/.github/workflows/build-swebench-images.yml index 6053db373..9b76c6a1f 100644 --- a/.github/workflows/build-swebench-images.yml +++ b/.github/workflows/build-swebench-images.yml @@ -245,6 +245,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swebenchmultimodal-images.yml b/.github/workflows/build-swebenchmultimodal-images.yml index 9b11b0ad5..c9e569e6f 100644 --- a/.github/workflows/build-swebenchmultimodal-images.yml +++ b/.github/workflows/build-swebenchmultimodal-images.yml @@ -245,6 +245,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swegym-images.yml b/.github/workflows/build-swegym-images.yml index 736ca3a81..2757d55cb 100644 --- a/.github/workflows/build-swegym-images.yml +++ b/.github/workflows/build-swegym-images.yml @@ -241,6 +241,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swesmith-images.yml b/.github/workflows/build-swesmith-images.yml index a52a5c9f8..0a95a7b38 100644 --- a/.github/workflows/build-swesmith-images.yml +++ b/.github/workflows/build-swesmith-images.yml @@ -241,6 +241,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swtbench-images.yml b/.github/workflows/build-swtbench-images.yml index fe9d07be6..e8e9ffc6a 100644 --- a/.github/workflows/build-swtbench-images.yml +++ b/.github/workflows/build-swtbench-images.yml @@ -179,6 +179,7 @@ jobs: echo "Running: $CMD" eval "$CMD" env: + LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain diff --git a/benchmarks/commit0/build_images.py b/benchmarks/commit0/build_images.py index 02e36a76c..3f24567ec 100644 --- a/benchmarks/commit0/build_images.py +++ b/benchmarks/commit0/build_images.py @@ -11,20 +11,15 @@ import os import sys +from commit0.harness.constants import SPLIT -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from commit0.harness.constants import SPLIT # noqa: E402 - -from benchmarks.commit0.config import BUILD_DEFAULTS, INFER_DEFAULTS # noqa: E402 -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.commit0.config import BUILD_DEFAULTS, INFER_DEFAULTS +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, ) -from openhands.sdk import get_logger # noqa: E402 +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/gaia/build_images.py b/benchmarks/gaia/build_images.py index dc50c39cb..9f6b90cd1 100644 --- a/benchmarks/gaia/build_images.py +++ b/benchmarks/gaia/build_images.py @@ -10,16 +10,10 @@ --image ghcr.io/openhands/eval-agent-server --target binary-minimal --push """ -import os import sys from pathlib import Path - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.utils.build_utils import ( BuildOutput, _get_sdk_submodule_info, build_all_images, @@ -27,7 +21,7 @@ get_build_parser, run_docker_build_layer, ) -from openhands.sdk import get_logger # noqa: E402 +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/multiswebench/build_images.py b/benchmarks/multiswebench/build_images.py index bdb970c38..6c3ef3b9f 100644 --- a/benchmarks/multiswebench/build_images.py +++ b/benchmarks/multiswebench/build_images.py @@ -12,20 +12,13 @@ import os from pathlib import Path - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.multiswebench.download_dataset import ( - download_and_concat_dataset, # noqa: E402 -) -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.multiswebench.download_dataset import download_and_concat_dataset +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, ) -from openhands.sdk import get_logger # noqa: E402 +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index 3c1a7544d..5ace5419b 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -9,27 +9,21 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ -import os import sys from pathlib import Path - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.swebench import constants # noqa: E402 -from benchmarks.swebench.config import BUILD_DEFAULTS # noqa: E402 -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.swebench import constants +from benchmarks.swebench.config import BUILD_DEFAULTS +from benchmarks.utils.build_utils import ( BuildOutput, build_all_images, default_build_output_dir, get_build_parser, run_docker_build_layer, ) -from benchmarks.utils.dataset import get_dataset # noqa: E402 -from benchmarks.utils.image_utils import remote_image_exists # noqa: E402 -from openhands.sdk import get_logger # noqa: E402 +from benchmarks.utils.dataset import get_dataset +from benchmarks.utils.image_utils import remote_image_exists +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/swebenchmultimodal/build_images.py b/benchmarks/swebenchmultimodal/build_images.py index 4cecf88b5..987cf7bda 100644 --- a/benchmarks/swebenchmultimodal/build_images.py +++ b/benchmarks/swebenchmultimodal/build_images.py @@ -8,22 +8,16 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ -import os import sys - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.swebenchmultimodal.config import BUILD_DEFAULTS # noqa: E402 -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.swebenchmultimodal.config import BUILD_DEFAULTS +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset # noqa: E402 -from openhands.sdk import get_logger # noqa: E402 +from benchmarks.utils.dataset import get_dataset +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/swegym/build_images.py b/benchmarks/swegym/build_images.py index 5f679d477..6c116a020 100644 --- a/benchmarks/swegym/build_images.py +++ b/benchmarks/swegym/build_images.py @@ -7,21 +7,15 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ -import os import sys - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset # noqa: E402 -from openhands.sdk import get_logger # noqa: E402 +from benchmarks.utils.dataset import get_dataset +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/swesmith/build_images.py b/benchmarks/swesmith/build_images.py index d6a36e929..51fda90ae 100644 --- a/benchmarks/swesmith/build_images.py +++ b/benchmarks/swesmith/build_images.py @@ -8,21 +8,15 @@ --image ghcr.io/openhands/eval-agent-server --target source-minimal """ -import os import sys - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, ) -from benchmarks.utils.dataset import get_dataset # noqa: E402 -from openhands.sdk import get_logger # noqa: E402 +from benchmarks.utils.dataset import get_dataset +from openhands.sdk import get_logger logger = get_logger(__name__) diff --git a/benchmarks/swtbench/build_images.py b/benchmarks/swtbench/build_images.py index 1af9c9345..3fcd2d8de 100644 --- a/benchmarks/swtbench/build_images.py +++ b/benchmarks/swtbench/build_images.py @@ -9,21 +9,15 @@ Note: SWT-bench uses max_workers=16 (vs SWE-bench's 32) via BUILD_DEFAULTS. """ -import os import sys - -# Disable rich logging to avoid threading issues with multiprocessing. -# Rich's RichHandler creates locks and threads that don't play well with fork(). -os.environ["LOG_JSON"] = "1" - -from benchmarks.swebench.build_images import ( # noqa: E402 +from benchmarks.swebench.build_images import ( _wrap_if_needed, collect_unique_base_images, extract_custom_tag, ) -from benchmarks.swtbench.config import BUILD_DEFAULTS # noqa: E402 -from benchmarks.utils.build_utils import ( # noqa: E402 +from benchmarks.swtbench.config import BUILD_DEFAULTS +from benchmarks.utils.build_utils import ( build_all_images, default_build_output_dir, get_build_parser, diff --git a/sitecustomize.py b/sitecustomize.py index 2c37e20e9..b20ed1c63 100644 --- a/sitecustomize.py +++ b/sitecustomize.py @@ -10,10 +10,14 @@ import sys +# ============================================================================ +# CENTRALIZED LOG_JSON SETTING +# ============================================================================ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -# Set this early before any SDK imports happen. -# Use direct assignment instead of setdefault to ensure it's always set. +# This is set here (sitecustomize.py) as the single source of truth, which is +# automatically loaded by Python when this directory is on sys.path. +# For defense in depth, GitHub Actions workflows also set LOG_JSON=1 explicitly. os.environ["LOG_JSON"] = "1" print("benchmarks sitecustomize imported", file=sys.stderr, flush=True) From dac2f730635b0d9a16d1c008633019430df1a4a5 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 22:38:41 +0000 Subject: [PATCH 4/6] Fix: Move LOG_JSON to workflow-level env to ensure it's set before any Python runs The issue was that LOG_JSON was only set in step-level env sections, which meant it wasn't available during 'make build' or other early steps. This caused Rich logging to be initialized, creating threads that deadlock with multiprocessing. Changes: - Moved LOG_JSON='1' from step-level env to global workflow env in all build workflows - This ensures LOG_JSON is set before any Python process starts, preventing Rich logging initialization - Removed redundant step-level LOG_JSON settings to avoid duplication Affected workflows: - build-swebench-images.yml - build-multiswebench-images.yml - build-swebenchmultimodal-images.yml - build-swegym-images.yml - build-swesmith-images.yml - build-commit0-images.yml - build-gaia-images.yml - build-swtbench-images.yml Co-authored-by: openhands --- .github/workflows/build-commit0-images.yml | 5 ++--- .github/workflows/build-gaia-images.yml | 7 ++++--- .github/workflows/build-multiswebench-images.yml | 2 +- .github/workflows/build-swebench-images.yml | 2 +- .github/workflows/build-swebenchmultimodal-images.yml | 2 +- .github/workflows/build-swegym-images.yml | 5 ++--- .github/workflows/build-swesmith-images.yml | 5 ++--- .github/workflows/build-swtbench-images.yml | 7 ++++--- 8 files changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-commit0-images.yml b/.github/workflows/build-commit0-images.yml index 72ae42bf6..d54c668f8 100644 --- a/.github/workflows/build-commit0-images.yml +++ b/.github/workflows/build-commit0-images.yml @@ -58,6 +58,7 @@ env: N_LIMIT: '' INSTANCE_IDS: '' SELECT_FILE: '' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-commit0-${{ github.ref }} @@ -182,9 +183,7 @@ jobs: echo "Running: $CMD" eval "$CMD" - env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock - DOCKER_BUILDKIT: 1 + env: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain - name: Archive build logs diff --git a/.github/workflows/build-gaia-images.yml b/.github/workflows/build-gaia-images.yml index dba24bbdf..d443acb65 100644 --- a/.github/workflows/build-gaia-images.yml +++ b/.github/workflows/build-gaia-images.yml @@ -18,6 +18,9 @@ concurrency: group: build-gaia-${{ github.ref }} cancel-in-progress: false +env: + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock + jobs: build-and-push: if: > @@ -96,9 +99,7 @@ jobs: eval "$CMD" echo "✅ GAIA image with MCP layer built and pushed successfully" - env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock - DOCKER_BUILDKIT: 1 + env: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain - name: Archive build logs diff --git a/.github/workflows/build-multiswebench-images.yml b/.github/workflows/build-multiswebench-images.yml index e743b5a0e..ff969eac1 100644 --- a/.github/workflows/build-multiswebench-images.yml +++ b/.github/workflows/build-multiswebench-images.yml @@ -64,6 +64,7 @@ env: BUILD_BATCH_SIZE: '15' BUILDKIT_PRUNE_KEEP_GB: '60' BUILDKIT_PRUNE_THRESHOLD_PCT: '60' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-multiswebench-${{ github.ref }} @@ -248,7 +249,6 @@ jobs: echo "Running: $CMD" eval "$CMD" env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swebench-images.yml b/.github/workflows/build-swebench-images.yml index 9b76c6a1f..88c697ed0 100644 --- a/.github/workflows/build-swebench-images.yml +++ b/.github/workflows/build-swebench-images.yml @@ -62,6 +62,7 @@ env: BUILD_BATCH_SIZE: '15' BUILDKIT_PRUNE_KEEP_GB: '60' BUILDKIT_PRUNE_THRESHOLD_PCT: '60' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-swe-bench-${{ github.ref }} @@ -245,7 +246,6 @@ jobs: echo "Running: $CMD" eval "$CMD" env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swebenchmultimodal-images.yml b/.github/workflows/build-swebenchmultimodal-images.yml index c9e569e6f..6ff080583 100644 --- a/.github/workflows/build-swebenchmultimodal-images.yml +++ b/.github/workflows/build-swebenchmultimodal-images.yml @@ -62,6 +62,7 @@ env: BUILD_BATCH_SIZE: '15' BUILDKIT_PRUNE_KEEP_GB: '60' BUILDKIT_PRUNE_THRESHOLD_PCT: '60' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-swe-bench-multimodal-${{ github.ref }} @@ -245,7 +246,6 @@ jobs: echo "Running: $CMD" eval "$CMD" env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swegym-images.yml b/.github/workflows/build-swegym-images.yml index 2757d55cb..e920d1532 100644 --- a/.github/workflows/build-swegym-images.yml +++ b/.github/workflows/build-swegym-images.yml @@ -58,6 +58,7 @@ env: BUILD_BATCH_SIZE: '15' BUILDKIT_PRUNE_KEEP_GB: '60' BUILDKIT_PRUNE_THRESHOLD_PCT: '60' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-swe-gym-${{ github.ref }} @@ -240,9 +241,7 @@ jobs: echo "Running: $CMD" eval "$CMD" - env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock - DOCKER_BUILDKIT: 1 + env: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swesmith-images.yml b/.github/workflows/build-swesmith-images.yml index 0a95a7b38..524c3981a 100644 --- a/.github/workflows/build-swesmith-images.yml +++ b/.github/workflows/build-swesmith-images.yml @@ -58,6 +58,7 @@ env: BUILD_BATCH_SIZE: '15' BUILDKIT_PRUNE_KEEP_GB: '60' BUILDKIT_PRUNE_THRESHOLD_PCT: '60' + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock concurrency: group: build-swe-smith-${{ github.ref }} @@ -240,9 +241,7 @@ jobs: echo "Running: $CMD" eval "$CMD" - env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock - DOCKER_BUILDKIT: 1 + env: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain BUILDKIT_RESET_ON_FAILURE: 1 diff --git a/.github/workflows/build-swtbench-images.yml b/.github/workflows/build-swtbench-images.yml index e8e9ffc6a..732fae429 100644 --- a/.github/workflows/build-swtbench-images.yml +++ b/.github/workflows/build-swtbench-images.yml @@ -63,6 +63,9 @@ concurrency: group: build-swt-bench-${{ github.ref }} cancel-in-progress: false +env: + LOG_JSON: '1' # Disable rich logging to prevent multiprocessing deadlock + jobs: build-and-push: if: > @@ -178,9 +181,7 @@ jobs: echo "Running: $CMD" eval "$CMD" - env: - LOG_JSON: "1" # Disable rich logging to prevent multiprocessing deadlock - DOCKER_BUILDKIT: 1 + env: DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain - name: Build prebaked eval env images From a5177af0046adcf675ef64a2f05490d923b32fd8 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 5 Mar 2026 23:00:27 +0000 Subject: [PATCH 5/6] Suppress verbose build stderr warnings from uv build The SDK logs every file copy operation from 'uv build --sdist' as a WARNING, resulting in 75k+ log lines (vs 6.6k before). This fix keeps LOG_JSON=1 for thread safety but sets the SDK build logger to ERROR level, filtering out the file copy warnings while preserving important error messages. Changes: - sitecustomize.py: Updated comment to document logging approach - build_images.py (7 files): Added logger config after SDK import to set openhands.agent_server.docker.build logger to ERROR level This addresses the excessive logging issue reported in PR #487 while maintaining the multiprocessing deadlock fix. Co-authored-by: openhands --- benchmarks/commit0/build_images.py | 5 +++++ benchmarks/gaia/build_images.py | 5 +++++ benchmarks/multiswebench/build_images.py | 5 +++++ benchmarks/swebench/build_images.py | 5 +++++ benchmarks/swebenchmultimodal/build_images.py | 5 +++++ benchmarks/swegym/build_images.py | 5 +++++ benchmarks/swesmith/build_images.py | 5 +++++ sitecustomize.py | 9 +++++---- 8 files changed, 40 insertions(+), 4 deletions(-) diff --git a/benchmarks/commit0/build_images.py b/benchmarks/commit0/build_images.py index 3f24567ec..09bf4125b 100644 --- a/benchmarks/commit0/build_images.py +++ b/benchmarks/commit0/build_images.py @@ -21,6 +21,11 @@ ) from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) DEFAULT_DOCKER_IMAGE_PREFIX = "docker.io/wentingzhao/" diff --git a/benchmarks/gaia/build_images.py b/benchmarks/gaia/build_images.py index 9f6b90cd1..2897fbdc1 100644 --- a/benchmarks/gaia/build_images.py +++ b/benchmarks/gaia/build_images.py @@ -23,6 +23,11 @@ ) from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/multiswebench/build_images.py b/benchmarks/multiswebench/build_images.py index 6c3ef3b9f..f6546853d 100644 --- a/benchmarks/multiswebench/build_images.py +++ b/benchmarks/multiswebench/build_images.py @@ -20,6 +20,11 @@ ) from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index 5ace5419b..be8ec3d6f 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -25,6 +25,11 @@ from benchmarks.utils.image_utils import remote_image_exists from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) WRAPPER_DOCKERFILE = Path(__file__).with_name("Dockerfile.swebench-deps") diff --git a/benchmarks/swebenchmultimodal/build_images.py b/benchmarks/swebenchmultimodal/build_images.py index 987cf7bda..0d66cfd2f 100644 --- a/benchmarks/swebenchmultimodal/build_images.py +++ b/benchmarks/swebenchmultimodal/build_images.py @@ -19,6 +19,11 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swegym/build_images.py b/benchmarks/swegym/build_images.py index 6c116a020..972863435 100644 --- a/benchmarks/swegym/build_images.py +++ b/benchmarks/swegym/build_images.py @@ -17,6 +17,11 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swesmith/build_images.py b/benchmarks/swesmith/build_images.py index 51fda90ae..a06aa09bf 100644 --- a/benchmarks/swesmith/build_images.py +++ b/benchmarks/swesmith/build_images.py @@ -18,6 +18,11 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger +# Suppress verbose build stderr warnings (file copy progress from uv build) +# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) +import logging + +logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/sitecustomize.py b/sitecustomize.py index b20ed1c63..09bea030d 100644 --- a/sitecustomize.py +++ b/sitecustomize.py @@ -11,15 +11,16 @@ # ============================================================================ -# CENTRALIZED LOG_JSON SETTING +# CENTRALIZED LOGGING CONFIGURATION # ============================================================================ # Disable rich logging to avoid threading issues with multiprocessing. # Rich's RichHandler creates locks and threads that don't play well with fork(). -# This is set here (sitecustomize.py) as the single source of truth, which is -# automatically loaded by Python when this directory is on sys.path. -# For defense in depth, GitHub Actions workflows also set LOG_JSON=1 explicitly. os.environ["LOG_JSON"] = "1" +# Note: Verbose build stderr warnings (75k+ lines from uv build file copying) +# are suppressed in individual build_images.py files by setting the SDK build +# logger to ERROR level after SDK imports. + print("benchmarks sitecustomize imported", file=sys.stderr, flush=True) try: From 17ed9f6e166edc85d14ef720f4f79cecdfa2e459 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 7 Mar 2026 00:01:03 +0000 Subject: [PATCH 6/6] Fix multiprocessing logging: Configure logger suppression in sitecustomize.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root Cause: - LOG_JSON=1 switches from Rich to JSON logging to avoid threading issues - SDK logs all Docker stderr as WARNING (~75k lines from uv build copying files) - Rich logging throttles/filters these automatically - JSON logging outputs EVERY WARNING line → 3x slowdown Why Previous Fix Failed: - Logger level was set in build_images.py AFTER SDK import - With ProcessPoolExecutor + fork(), child processes get their own logging config - Parent's logger level changes don't propagate to forked children - Each child process still had default WARNING level Solution: - Configure logger level in sitecustomize.py (auto-imported at Python startup) - Affects ALL processes including forked children - Remove redundant logger configuration from individual build_images.py files Verified: - Build logger level correctly set to ERROR in all processes - WARNING messages suppressed, ERROR messages still logged - Pre-commit checks pass Co-authored-by: openhands --- benchmarks/commit0/build_images.py | 5 ----- benchmarks/gaia/build_images.py | 5 ----- benchmarks/multiswebench/build_images.py | 5 ----- benchmarks/swebench/build_images.py | 5 ----- benchmarks/swebenchmultimodal/build_images.py | 5 ----- benchmarks/swegym/build_images.py | 5 ----- benchmarks/swesmith/build_images.py | 5 ----- sitecustomize.py | 20 +++++++++++++++---- 8 files changed, 16 insertions(+), 39 deletions(-) diff --git a/benchmarks/commit0/build_images.py b/benchmarks/commit0/build_images.py index 09bf4125b..3f24567ec 100644 --- a/benchmarks/commit0/build_images.py +++ b/benchmarks/commit0/build_images.py @@ -21,11 +21,6 @@ ) from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) DEFAULT_DOCKER_IMAGE_PREFIX = "docker.io/wentingzhao/" diff --git a/benchmarks/gaia/build_images.py b/benchmarks/gaia/build_images.py index 2897fbdc1..9f6b90cd1 100644 --- a/benchmarks/gaia/build_images.py +++ b/benchmarks/gaia/build_images.py @@ -23,11 +23,6 @@ ) from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/multiswebench/build_images.py b/benchmarks/multiswebench/build_images.py index f6546853d..6c3ef3b9f 100644 --- a/benchmarks/multiswebench/build_images.py +++ b/benchmarks/multiswebench/build_images.py @@ -20,11 +20,6 @@ ) from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swebench/build_images.py b/benchmarks/swebench/build_images.py index be8ec3d6f..5ace5419b 100644 --- a/benchmarks/swebench/build_images.py +++ b/benchmarks/swebench/build_images.py @@ -25,11 +25,6 @@ from benchmarks.utils.image_utils import remote_image_exists from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) WRAPPER_DOCKERFILE = Path(__file__).with_name("Dockerfile.swebench-deps") diff --git a/benchmarks/swebenchmultimodal/build_images.py b/benchmarks/swebenchmultimodal/build_images.py index 0d66cfd2f..987cf7bda 100644 --- a/benchmarks/swebenchmultimodal/build_images.py +++ b/benchmarks/swebenchmultimodal/build_images.py @@ -19,11 +19,6 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swegym/build_images.py b/benchmarks/swegym/build_images.py index 972863435..6c116a020 100644 --- a/benchmarks/swegym/build_images.py +++ b/benchmarks/swegym/build_images.py @@ -17,11 +17,6 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/benchmarks/swesmith/build_images.py b/benchmarks/swesmith/build_images.py index a06aa09bf..51fda90ae 100644 --- a/benchmarks/swesmith/build_images.py +++ b/benchmarks/swesmith/build_images.py @@ -18,11 +18,6 @@ from benchmarks.utils.dataset import get_dataset from openhands.sdk import get_logger -# Suppress verbose build stderr warnings (file copy progress from uv build) -# The SDK logs every "[stderr] copying..." line as WARNING (75k+ lines total) -import logging - -logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) logger = get_logger(__name__) diff --git a/sitecustomize.py b/sitecustomize.py index 09bea030d..de030efd4 100644 --- a/sitecustomize.py +++ b/sitecustomize.py @@ -17,12 +17,24 @@ # Rich's RichHandler creates locks and threads that don't play well with fork(). os.environ["LOG_JSON"] = "1" -# Note: Verbose build stderr warnings (75k+ lines from uv build file copying) -# are suppressed in individual build_images.py files by setting the SDK build -# logger to ERROR level after SDK imports. - print("benchmarks sitecustomize imported", file=sys.stderr, flush=True) +# Suppress verbose Docker build stderr warnings (75k+ lines from uv copying files). +# This MUST be done in sitecustomize.py to affect forked child processes. +# The SDK logs every "[stderr] copying..." line as WARNING, causing massive log output +# and 3x slowdown with JSON logging (Rich logging throttled these automatically). +try: + import logging + + # Import SDK logger to trigger auto-configuration with LOG_JSON=1 + from openhands.sdk.logger import get_logger # noqa: F401 + + # Now suppress the verbose build logger in ALL processes (including forked children) + logging.getLogger("openhands.agent_server.docker.build").setLevel(logging.ERROR) +except Exception: + # Best-effort: don't break if SDK structure changes + pass + try: # Reuse the actual patch logic that lives alongside the benchmarks package. from benchmarks.utils.sitecustomize import _apply_modal_logging_patch