From 2b2c0a5b1b1d462199705d2eaee4f385a3973c17 Mon Sep 17 00:00:00 2001
From: Rakhman Asmatullayev <rakhman@asmatullayev.dev>
Date: Wed, 4 Mar 2026 12:59:55 -0300
Subject: [PATCH 1/2] Fix: create evaluation span in child process to exclude
 queue wait time

Previously, the root "Evaluation" span was created in the parent process
alongside the datapoint, before a worker picked up the task. When more
tasks were queued than available workers, the span included idle wait
time, resulting in inflated durations and misleading timelines in Laminar.

Now, the "Evaluation" span is created in the child process when execution
actually begins. The span's trace ID is then linked back to the Laminar
datapoint via update_datapoint, so the timeline accurately reflects real
execution time. Requires lmnr>=0.7.41 for update_datapoint trace ID
support.

Co-Authored-By: Simon Rosenberg <simon@rosenberg.dev>
---
 benchmarks/utils/evaluation.py  | 301 ++++++++++++++++++--------------
 benchmarks/utils/laminar.py     |  61 ++++---
 pyproject.toml                  |   2 +-
 tests/test_workspace_cleanup.py |   8 +-
 4 files changed, 211 insertions(+), 161 deletions(-)

diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py
index bb1a9b68a..750736ece 100644
--- a/benchmarks/utils/evaluation.py
+++ b/benchmarks/utils/evaluation.py
@@ -14,7 +14,7 @@
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple
 from uuid import UUID
 
 from lmnr import Laminar
@@ -384,19 +384,20 @@ def attempt_on_result(instance: EvalInstance, out: EvalOutput) -> None:
             pending_instances: dict[Future, PendingInstance] = {}
             try:
                 for index, inst in enumerate(instances_to_process):
-                    datapoint_id, lmnr_span_ctx = (
-                        LaminarService.get().create_evaluation_datapoint(
-                            self.metadata.lmnr.eval_id,
-                            inst.id,
-                            self.metadata.model_dump(mode="json"),
-                            index,
-                            session_id=self._laminar_session_id,
-                            trace_metadata=self._laminar_trace_meta,
-                        )
+                    datapoint_id = LaminarService.get().create_evaluation_datapoint(
+                        self.metadata.lmnr.eval_id,
+                        inst.id,
+                        self.metadata.model_dump(mode="json"),
+                        index,
                     )
 
                     fut = pool.submit(
-                        self._process_one_mp, inst, lmnr_span_ctx, attempt
+                        self._process_one_mp,
+                        inst,
+                        attempt,
+                        lmnr_session_id=self._laminar_session_id,
+                        lmnr_trace_metadata=self._laminar_trace_meta,
+                        lmnr_datapoint_id=datapoint_id,
                     )
                     futures.append(fut)
                     pending_instances[fut] = PendingInstance(
@@ -572,7 +573,12 @@ def _calculate_resource_factor(self, runtime_failure_count: int) -> int:
 
     # --- Worker-side method (executed in child processes) ---------------------------
     def _process_one_mp(
-        self, instance: EvalInstance, eval_span_ctx: str | None, critic_attempt: int
+        self,
+        instance: EvalInstance,
+        critic_attempt: int,
+        lmnr_session_id: str | None = None,
+        lmnr_trace_metadata: dict[str, Any] | None = None,
+        lmnr_datapoint_id: UUID | None = None,
     ) -> Tuple[EvalInstance, EvalOutput]:
         """Execute one instance in a child process with retry logic.
 
@@ -593,147 +599,176 @@ def _process_one_mp(
         with redirect_stdout_stderr(log_file):
             logger.info("[child] start id=%s", instance.id)
 
+            # Create root "Evaluation" span in the child so the timeline
+            # reflects actual execution start, then update the datapoint
+            # with the span's trace_id.
+            eval_span = Laminar.start_active_span(
+                "Evaluation",
+                span_type="EVALUATION",  # type: ignore
+                session_id=lmnr_session_id,
+                metadata=lmnr_trace_metadata,
+            )
+            eval_span_ctx = Laminar.get_laminar_span_context(eval_span)
+
+            if lmnr_datapoint_id is not None and self.metadata.lmnr is not None:
+                trace_id = UUID(int=eval_span.get_span_context().trace_id)
+                LaminarService.get().initialize()
+                LaminarService.get().update_datapoint_trace_id(
+                    eval_id=self.metadata.lmnr.eval_id,
+                    datapoint_id=lmnr_datapoint_id,
+                    trace_id=trace_id,
+                )
+
             retry_count = 0
             runtime_failure_count = 0
             last_error = None
             max_retries = self.metadata.max_retries
             runtime_runs: list[RemoteRuntimeAllocation] = []
 
-            while retry_count <= max_retries:
-                workspace = None
-
-                # Start Laminar execution span and inject context into os.environ so workspace can pick it up
-                # Escape the serialized context to safely pass as a cli argument
-                lmnr_span = Laminar.start_active_span(
-                    "Execution",
-                    span_type="EXECUTOR",  # type: ignore
-                    parent_span_context=Laminar.deserialize_span_context(eval_span_ctx)
-                    if eval_span_ctx
-                    else None,
-                )
-                exec_span_ctx = json.dumps(Laminar.serialize_span_context(lmnr_span))
-                os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or ""
-
-                try:
-                    # Calculate resource factor based on runtime failures
-                    resource_factor = self._calculate_resource_factor(
-                        runtime_failure_count
+            try:
+                while retry_count <= max_retries:
+                    workspace = None
+
+                    # Start Laminar execution span and inject context into os.environ so workspace can pick it up
+                    # Escape the serialized context to safely pass as a cli argument
+                    exec_span = Laminar.start_active_span(
+                        "Execution",
+                        span_type="EXECUTOR",  # type: ignore
+                        parent_span_context=eval_span_ctx,
                     )
-                    if runtime_failure_count > 0:
-                        logger.warning(
-                            f"[child] Instance {instance.id}: "
-                            f"attempt {retry_count + 1}/{max_retries + 1}, "
-                            f"runtime_failure_count={runtime_failure_count}, "
-                            f"resource_factor={resource_factor}"
-                        )
-
-                    workspace = self.prepare_workspace(
-                        instance,
-                        resource_factor=resource_factor,
-                        forward_env=LMNR_ENV_VARS,
+                    exec_span_ctx = json.dumps(
+                        Laminar.serialize_span_context(exec_span)
                     )
+                    os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or ""
 
-                    # Record runtime/pod mapping only for remote runtimes
-                    if isinstance(workspace, APIRemoteWorkspace):
-                        retry_number = retry_count + 1  # 1-indexed for readability
-                        runtime_run = RemoteRuntimeAllocation(
-                            runtime_id=getattr(workspace, "_runtime_id", None),
-                            session_id=getattr(workspace, "session_id", None),
-                            runtime_url=getattr(workspace, "_runtime_url", None),
-                            resource_factor=resource_factor,
-                            critic_attempt=critic_attempt,
-                            retry=retry_number,
-                            started_at=datetime.now(timezone.utc),
-                        )
-                        runtime_runs.append(runtime_run)
-                        logger.info(
-                            "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s",
-                            instance.id,
-                            critic_attempt,
-                            retry_number,
-                            workspace.__class__.__name__,
-                            runtime_run.runtime_id,
-                            runtime_run.session_id,
-                            runtime_run.resource_factor,
+                    try:
+                        # Calculate resource factor based on runtime failures
+                        resource_factor = self._calculate_resource_factor(
+                            runtime_failure_count
                         )
-                    out = self.evaluate_instance(instance, workspace)
-                    if runtime_runs:
-                        out.runtime_runs = runtime_runs
-                    logger.info("[child] done id=%s", instance.id)
-                    return instance, out
-                except Exception as e:
-                    last_error = e
-                    retry_count += 1
-                    lmnr_span.record_exception(e)
+                        if runtime_failure_count > 0:
+                            logger.warning(
+                                f"[child] Instance {instance.id}: "
+                                f"attempt {retry_count + 1}/{max_retries + 1}, "
+                                f"runtime_failure_count={runtime_failure_count}, "
+                                f"resource_factor={resource_factor}"
+                            )
 
-                    # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod
-                    runtime_id = (
-                        getattr(workspace, "_runtime_id", None) if workspace else None
-                    )
-                    session_id = (
-                        getattr(workspace, "session_id", None) if workspace else None
-                    )
-                    if isinstance(workspace, APIRemoteWorkspace) or (
-                        "Runtime not yet ready" in str(e)
-                    ):
-                        logger.warning(
-                            "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s",
-                            instance.id,
-                            critic_attempt,
-                            retry_count,
-                            runtime_id,
-                            session_id,
-                            str(e),
+                        workspace = self.prepare_workspace(
+                            instance,
+                            resource_factor=resource_factor,
+                            forward_env=LMNR_ENV_VARS,
                         )
 
-                    # TODO(#277): add an exception classifier to decide when to bump resources
-                    runtime_failure_count += 1
-                    logger.warning(
-                        f"[child] Instance {instance.id}: runtime_failure_count="
-                        f"{runtime_failure_count}"
-                    )
-
-                    if retry_count <= max_retries:
-                        logger.warning(
-                            f"[child] Instance {instance.id} failed "
-                            f"(attempt {retry_count}/{max_retries}): "
-                            f"{str(e)}"
-                        )
-                    else:
-                        logger.error(
-                            f"[child] Instance {instance.id} failed after "
-                            f"{max_retries} retries. Last error: {str(e)}",
-                            exc_info=True,
+                        # Record runtime/pod mapping only for remote runtimes
+                        if isinstance(workspace, APIRemoteWorkspace):
+                            retry_number = retry_count + 1  # 1-indexed for readability
+                            runtime_run = RemoteRuntimeAllocation(
+                                runtime_id=getattr(workspace, "_runtime_id", None),
+                                session_id=getattr(workspace, "session_id", None),
+                                runtime_url=getattr(workspace, "_runtime_url", None),
+                                resource_factor=resource_factor,
+                                critic_attempt=critic_attempt,
+                                retry=retry_number,
+                                started_at=datetime.now(timezone.utc),
+                            )
+                            runtime_runs.append(runtime_run)
+                            logger.info(
+                                "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s",
+                                instance.id,
+                                critic_attempt,
+                                retry_number,
+                                workspace.__class__.__name__,
+                                runtime_run.runtime_id,
+                                runtime_run.session_id,
+                                runtime_run.resource_factor,
+                            )
+                        out = self.evaluate_instance(instance, workspace)
+                        if runtime_runs:
+                            out.runtime_runs = runtime_runs
+                        logger.info("[child] done id=%s", instance.id)
+                        return instance, out
+                    except Exception as e:
+                        last_error = e
+                        retry_count += 1
+                        exec_span.record_exception(e)
+
+                        # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod
+                        runtime_id = (
+                            getattr(workspace, "_runtime_id", None)
+                            if workspace
+                            else None
                         )
-                        # Create error output for final failure
-                        error_output = self._create_error_output(
-                            instance, last_error, max_retries
+                        session_id = (
+                            getattr(workspace, "session_id", None)
+                            if workspace
+                            else None
                         )
-                        if runtime_runs:
-                            error_output.runtime_runs = runtime_runs
-                        return instance, error_output
-                finally:
-                    # Ensure workspace cleanup happens regardless of success or failure
-                    if workspace is not None:
-                        try:
-                            self._capture_conversation_archive(workspace, instance)
-                        except Exception as archive_error:
+                        if isinstance(workspace, APIRemoteWorkspace) or (
+                            "Runtime not yet ready" in str(e)
+                        ):
                             logger.warning(
-                                "[child] Failed to capture conversation archive for %s: %s",
+                                "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s",
                                 instance.id,
-                                archive_error,
-                            )
-                        try:
-                            workspace.__exit__(None, None, None)
-                            logger.debug(
-                                "[child] cleaned up workspace for id=%s", instance.id
+                                critic_attempt,
+                                retry_count,
+                                runtime_id,
+                                session_id,
+                                str(e),
                             )
-                        except Exception as cleanup_error:
+
+                        # TODO(#277): add an exception classifier to decide when to bump resources
+                        runtime_failure_count += 1
+                        logger.warning(
+                            f"[child] Instance {instance.id}: runtime_failure_count="
+                            f"{runtime_failure_count}"
+                        )
+
+                        if retry_count <= max_retries:
                             logger.warning(
-                                f"[child] Failed to cleanup workspace for {instance.id}: "
-                                f"{str(cleanup_error)[:50]}"
+                                f"[child] Instance {instance.id} failed "
+                                f"(attempt {retry_count}/{max_retries}): "
+                                f"{str(e)}"
+                            )
+                        else:
+                            logger.error(
+                                f"[child] Instance {instance.id} failed after "
+                                f"{max_retries} retries. Last error: {str(e)}",
+                                exc_info=True,
+                            )
+                            # Create error output for final failure
+                            error_output = self._create_error_output(
+                                instance, last_error, max_retries
                             )
-                    lmnr_span.end()
+                            if runtime_runs:
+                                error_output.runtime_runs = runtime_runs
+                            return instance, error_output
+                    finally:
+                        # Ensure workspace cleanup happens regardless of success or failure
+                        if workspace is not None:
+                            try:
+                                self._capture_conversation_archive(workspace, instance)
+                            except Exception as archive_error:
+                                logger.warning(
+                                    "[child] Failed to capture conversation archive for %s: %s",
+                                    instance.id,
+                                    archive_error,
+                                )
+                            try:
+                                # Use the context manager protocol for cleanup
+                                workspace.__exit__(None, None, None)
+                                logger.debug(
+                                    "[child] cleaned up workspace for id=%s",
+                                    instance.id,
+                                )
+                            except Exception as cleanup_error:
+                                logger.warning(
+                                    f"[child] Failed to cleanup workspace for {instance.id}: "
+                                    f"{str(cleanup_error)[:50]}"
+                                )
+                        exec_span.end()
+            finally:
+                eval_span.end()
 
             # This should never be reached, but added for type safety
             error_output = self._create_error_output(
diff --git a/benchmarks/utils/laminar.py b/benchmarks/utils/laminar.py
index 921418191..2ea41eb7c 100644
--- a/benchmarks/utils/laminar.py
+++ b/benchmarks/utils/laminar.py
@@ -103,51 +103,66 @@ def create_evaluation_datapoint(
         data: Any,
         metadata: dict[str, Any],
         index: int,
-        session_id: str | None = None,
-        trace_metadata: dict[str, Any] | None = None,
-    ) -> tuple[UUID | None, str | None]:
+    ) -> UUID | None:
         """
         Create a Laminar datapoint.
-        Creates a new span for the evaluation and returns the span context.
-        Session ID and trace metadata are set on the span if provided.
+
+        The datapoint is registered immediately (for UI visibility and progress
+        tracking) without a trace ID.  The child process will later start the
+        root "Evaluation" span and update the datapoint with the real trace ID,
+        so the timeline accurately reflects when work begins.
+
+        Returns the datapoint_id.
         """
 
         if eval_id is None:
-            return None, None
+            return None
 
         client = self._get_client()
         if client is None:
-            return None, None
+            return None
 
         try:
-            eval_span = Laminar.start_active_span(
-                "Evaluation",
-                span_type="EVALUATION",  # type: ignore
-            )
-            # Set session ID and metadata on the active span
-            if session_id:
-                Laminar.set_trace_session_id(session_id)
-            if trace_metadata:
-                Laminar.set_trace_metadata(trace_metadata)
-
-            lmnr_span_ctx = Laminar.serialize_span_context(eval_span)
-            eval_span.end()
-
             return client.evals.create_datapoint(
                 eval_id=eval_id,
                 data=data,
                 target=1,
                 metadata=metadata,
                 index=index,
-                trace_id=UUID(int=eval_span.get_span_context().trace_id),
-            ), lmnr_span_ctx
+            )
         except Exception as exc:
             logger.debug(
                 "Failed to create Laminar datapoint for eval %s: %s",
                 eval_id,
                 exc,
             )
-            return None, None
+            return None
+
+    def update_datapoint_trace_id(
+        self,
+        eval_id: UUID | None,
+        datapoint_id: UUID | None,
+        trace_id: UUID,
+    ) -> None:
+        """Link a datapoint to a trace after the span has been created."""
+
+        client = self._get_client()
+        if client is None or not eval_id or not datapoint_id:
+            return
+
+        try:
+            client.evals.update_datapoint(
+                eval_id=eval_id,
+                datapoint_id=datapoint_id,
+                scores={},
+                trace_id=trace_id,
+            )
+        except Exception as exc:  # pragma: no cover - defensive logging
+            logger.debug(
+                "Failed to update trace_id for datapoint %s: %s",
+                datapoint_id,
+                exc,
+            )
 
     def _update_evaluation_datapoint(
         self,
diff --git a/pyproject.toml b/pyproject.toml
index 2cb142648..330d901b8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
     "python-json-logger>=3.3.0",
     "tenacity>=9.1.2",
     "websockets>=12",
-    "lmnr>=0.7.24",
+    "lmnr>=0.7.41",
     # TODO Remove the macOS exclusion once https://github.com/multi-swe-bench/multi-swe-bench/pull/93 is merged and released
     "multi-swe-bench>=1.1.1; sys_platform != 'darwin'",
     "swt-bench @ git+https://github.com/logic-star-ai/swt-bench.git@5fdcd446ff05e248ecfffc19d560a210699f71f8",
diff --git a/tests/test_workspace_cleanup.py b/tests/test_workspace_cleanup.py
index 1af4e764a..24632dcea 100644
--- a/tests/test_workspace_cleanup.py
+++ b/tests/test_workspace_cleanup.py
@@ -60,7 +60,7 @@ def evaluate_instance(self, instance, workspace):
 
     evaluator = TestEvaluation(metadata=metadata, num_workers=1)
     result_instance, result_output = evaluator._process_one_mp(
-        test_instance, None, critic_attempt=1
+        test_instance, critic_attempt=1
     )
 
     mock_workspace.__exit__.assert_called_once_with(None, None, None)
@@ -111,7 +111,7 @@ def evaluate_instance(self, instance, workspace):
 
     evaluator = TestEvaluation(metadata=metadata, num_workers=1)
     result_instance, result_output = evaluator._process_one_mp(
-        test_instance, None, critic_attempt=1
+        test_instance, critic_attempt=1
     )
 
     mock_workspace.__exit__.assert_called_once_with(None, None, None)
@@ -171,7 +171,7 @@ def evaluate_instance(self, instance, workspace):
 
     evaluator = TestEvaluation(metadata=metadata, num_workers=1)
     result_instance, result_output = evaluator._process_one_mp(
-        test_instance, None, critic_attempt=1
+        test_instance, critic_attempt=1
     )
 
     mock_workspace.__exit__.assert_called_once_with(None, None, None)
@@ -241,7 +241,7 @@ def evaluate_instance(self, instance, workspace):
 
     evaluator = TestEvaluation(metadata=metadata, num_workers=1)
     result_instance, result_output = evaluator._process_one_mp(
-        test_instance, None, critic_attempt=1
+        test_instance, critic_attempt=1
     )
 
     assert len(workspaces_created) == 3

From 9441a3403787e2d47c67d2cc0c12cb2599c12871 Mon Sep 17 00:00:00 2001
From: Simon Rosenberg <simon@rosenberg.dev>
Date: Wed, 4 Mar 2026 15:46:13 -0300
Subject: [PATCH 2/2] Address PR review feedback: improve robustness, docs, and
 tests

- Fix span lifecycle safety: initialize eval_span/exec_span to None
  with guarded .end() calls in finally blocks
- Broaden exception handling for trace linking to catch all failures
- Clarify _execute_single_attempt return convention in docstring
- Add tests for datapoint trace linking, failure resilience
- Satisfy pyright return type with unreachable assertion
- Update uv.lock for lmnr>=0.7.41

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 benchmarks/utils/evaluation.py  | 378 ++++++++++++++++++--------------
 benchmarks/utils/laminar.py     |  19 +-
 tests/test_workspace_cleanup.py | 219 +++++++++++++++++-
 uv.lock                         |  62 +++---
 4 files changed, 474 insertions(+), 204 deletions(-)

diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py
index 750736ece..e3a6f464e 100644
--- a/benchmarks/utils/evaluation.py
+++ b/benchmarks/utils/evaluation.py
@@ -571,6 +571,27 @@ def _calculate_resource_factor(self, runtime_failure_count: int) -> int:
         factor = self.metadata.base_resource_factor * (2**runtime_failure_count)
         return min(factor, self.metadata.max_resource_factor)
 
+    def _cleanup_workspace(
+        self, workspace: RemoteWorkspace, instance: EvalInstance
+    ) -> None:
+        """Clean up workspace resources and capture conversation archive."""
+        try:
+            self._capture_conversation_archive(workspace, instance)
+        except Exception as archive_error:
+            logger.warning(
+                "[child] Failed to capture conversation archive for %s: %s",
+                instance.id,
+                archive_error,
+            )
+        try:
+            workspace.__exit__(None, None, None)
+            logger.debug("[child] cleaned up workspace for id=%s", instance.id)
+        except Exception as cleanup_error:
+            logger.warning(
+                f"[child] Failed to cleanup workspace for {instance.id}: "
+                f"{str(cleanup_error)[:50]}"
+            )
+
     # --- Worker-side method (executed in child processes) ---------------------------
     def _process_one_mp(
         self,
@@ -599,184 +620,211 @@ def _process_one_mp(
         with redirect_stdout_stderr(log_file):
             logger.info("[child] start id=%s", instance.id)
 
-            # Create root "Evaluation" span in the child so the timeline
-            # reflects actual execution start, then update the datapoint
-            # with the span's trace_id.
-            eval_span = Laminar.start_active_span(
-                "Evaluation",
-                span_type="EVALUATION",  # type: ignore
-                session_id=lmnr_session_id,
-                metadata=lmnr_trace_metadata,
-            )
-            eval_span_ctx = Laminar.get_laminar_span_context(eval_span)
-
-            if lmnr_datapoint_id is not None and self.metadata.lmnr is not None:
-                trace_id = UUID(int=eval_span.get_span_context().trace_id)
-                LaminarService.get().initialize()
-                LaminarService.get().update_datapoint_trace_id(
-                    eval_id=self.metadata.lmnr.eval_id,
-                    datapoint_id=lmnr_datapoint_id,
-                    trace_id=trace_id,
-                )
-
-            retry_count = 0
-            runtime_failure_count = 0
-            last_error = None
-            max_retries = self.metadata.max_retries
-            runtime_runs: list[RemoteRuntimeAllocation] = []
-
+            # Two-phase datapoint linking:
+            # 1. Parent creates datapoint immediately (for UI progress tracking)
+            # 2. Child starts eval_span when work begins (accurate timeline)
+            # 3. Link them via update_datapoint_trace_id (requires lmnr>=0.7.41)
+            #
+            # We don't create the datapoint here with the trace_id directly
+            # because the parent process queues tasks before workers pick them
+            # up, which would include idle wait time in the span duration.
+            eval_span = None
             try:
-                while retry_count <= max_retries:
-                    workspace = None
-
-                    # Start Laminar execution span and inject context into os.environ so workspace can pick it up
-                    # Escape the serialized context to safely pass as a cli argument
-                    exec_span = Laminar.start_active_span(
-                        "Execution",
-                        span_type="EXECUTOR",  # type: ignore
-                        parent_span_context=eval_span_ctx,
-                    )
-                    exec_span_ctx = json.dumps(
-                        Laminar.serialize_span_context(exec_span)
+                eval_span = Laminar.start_active_span(
+                    "Evaluation",
+                    span_type="EVALUATION",  # type: ignore
+                    session_id=lmnr_session_id,
+                    metadata=lmnr_trace_metadata,
+                )
+                eval_span_ctx = Laminar.get_laminar_span_context(eval_span)
+
+                if lmnr_datapoint_id is not None and self.metadata.lmnr is not None:
+                    # OpenTelemetry trace_id is a 128-bit integer in span context
+                    trace_id = UUID(int=eval_span.get_span_context().trace_id)
+                    logger.info(
+                        "[child] Linking datapoint %s to trace %s for instance %s",
+                        lmnr_datapoint_id,
+                        trace_id,
+                        instance.id,
                     )
-                    os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or ""
-
                     try:
-                        # Calculate resource factor based on runtime failures
-                        resource_factor = self._calculate_resource_factor(
-                            runtime_failure_count
+                        # Re-initialize in child process — multiprocessing
+                        # isolation means the parent's SDK state is not shared.
+                        LaminarService.get().initialize()
+                        LaminarService.get().update_datapoint_trace_id(
+                            eval_id=self.metadata.lmnr.eval_id,
+                            datapoint_id=lmnr_datapoint_id,
+                            trace_id=trace_id,
                         )
-                        if runtime_failure_count > 0:
-                            logger.warning(
-                                f"[child] Instance {instance.id}: "
-                                f"attempt {retry_count + 1}/{max_retries + 1}, "
-                                f"runtime_failure_count={runtime_failure_count}, "
-                                f"resource_factor={resource_factor}"
-                            )
-
-                        workspace = self.prepare_workspace(
-                            instance,
-                            resource_factor=resource_factor,
-                            forward_env=LMNR_ENV_VARS,
+                    except Exception as exc:
+                        logger.error(
+                            "[child] Failed to link datapoint %s to trace for instance %s: %s",
+                            lmnr_datapoint_id,
+                            instance.id,
+                            exc,
+                            exc_info=True,
                         )
 
-                        # Record runtime/pod mapping only for remote runtimes
-                        if isinstance(workspace, APIRemoteWorkspace):
-                            retry_number = retry_count + 1  # 1-indexed for readability
-                            runtime_run = RemoteRuntimeAllocation(
-                                runtime_id=getattr(workspace, "_runtime_id", None),
-                                session_id=getattr(workspace, "session_id", None),
-                                runtime_url=getattr(workspace, "_runtime_url", None),
-                                resource_factor=resource_factor,
-                                critic_attempt=critic_attempt,
-                                retry=retry_number,
-                                started_at=datetime.now(timezone.utc),
-                            )
-                            runtime_runs.append(runtime_run)
-                            logger.info(
-                                "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s",
-                                instance.id,
-                                critic_attempt,
-                                retry_number,
-                                workspace.__class__.__name__,
-                                runtime_run.runtime_id,
-                                runtime_run.session_id,
-                                runtime_run.resource_factor,
-                            )
-                        out = self.evaluate_instance(instance, workspace)
-                        if runtime_runs:
-                            out.runtime_runs = runtime_runs
-                        logger.info("[child] done id=%s", instance.id)
+                retry_count = 0
+                runtime_failure_count = 0
+                max_retries = self.metadata.max_retries
+                runtime_runs: list[RemoteRuntimeAllocation] = []
+
+                # max_retries is the number of *additional* attempts after the
+                # first, so total attempts = max_retries + 1 (retry_count 0..N).
+                while retry_count <= max_retries:
+                    out = self._execute_single_attempt(
+                        instance=instance,
+                        eval_span_ctx=eval_span_ctx,
+                        critic_attempt=critic_attempt,
+                        resource_factor=self._calculate_resource_factor(
+                            runtime_failure_count
+                        ),
+                        retry_count=retry_count,
+                        max_retries=max_retries,
+                        runtime_failure_count=runtime_failure_count,
+                        runtime_runs=runtime_runs,
+                    )
+                    if out is not None:
                         return instance, out
-                    except Exception as e:
-                        last_error = e
-                        retry_count += 1
-                        exec_span.record_exception(e)
-
-                        # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod
-                        runtime_id = (
-                            getattr(workspace, "_runtime_id", None)
-                            if workspace
-                            else None
-                        )
-                        session_id = (
-                            getattr(workspace, "session_id", None)
-                            if workspace
-                            else None
-                        )
-                        if isinstance(workspace, APIRemoteWorkspace) or (
-                            "Runtime not yet ready" in str(e)
-                        ):
-                            logger.warning(
-                                "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s",
-                                instance.id,
-                                critic_attempt,
-                                retry_count,
-                                runtime_id,
-                                session_id,
-                                str(e),
-                            )
 
-                        # TODO(#277): add an exception classifier to decide when to bump resources
-                        runtime_failure_count += 1
-                        logger.warning(
-                            f"[child] Instance {instance.id}: runtime_failure_count="
-                            f"{runtime_failure_count}"
-                        )
+                    # _execute_single_attempt returns None on non-final failure
+                    retry_count += 1
+                    runtime_failure_count += 1
 
-                        if retry_count <= max_retries:
-                            logger.warning(
-                                f"[child] Instance {instance.id} failed "
-                                f"(attempt {retry_count}/{max_retries}): "
-                                f"{str(e)}"
-                            )
-                        else:
-                            logger.error(
-                                f"[child] Instance {instance.id} failed after "
-                                f"{max_retries} retries. Last error: {str(e)}",
-                                exc_info=True,
-                            )
-                            # Create error output for final failure
-                            error_output = self._create_error_output(
-                                instance, last_error, max_retries
-                            )
-                            if runtime_runs:
-                                error_output.runtime_runs = runtime_runs
-                            return instance, error_output
-                    finally:
-                        # Ensure workspace cleanup happens regardless of success or failure
-                        if workspace is not None:
-                            try:
-                                self._capture_conversation_archive(workspace, instance)
-                            except Exception as archive_error:
-                                logger.warning(
-                                    "[child] Failed to capture conversation archive for %s: %s",
-                                    instance.id,
-                                    archive_error,
-                                )
-                            try:
-                                # Use the context manager protocol for cleanup
-                                workspace.__exit__(None, None, None)
-                                logger.debug(
-                                    "[child] cleaned up workspace for id=%s",
-                                    instance.id,
-                                )
-                            except Exception as cleanup_error:
-                                logger.warning(
-                                    f"[child] Failed to cleanup workspace for {instance.id}: "
-                                    f"{str(cleanup_error)[:50]}"
-                                )
-                        exec_span.end()
+                # Unreachable: _execute_single_attempt always returns EvalOutput
+                # on the final retry, but pyright can't prove the loop exits early.
+                raise AssertionError("unreachable")  # pragma: no cover
             finally:
-                eval_span.end()
+                if eval_span is not None:
+                    eval_span.end()
+
+    def _execute_single_attempt(
+        self,
+        instance: EvalInstance,
+        eval_span_ctx: Any,
+        critic_attempt: int,
+        resource_factor: int,
+        retry_count: int,
+        max_retries: int,
+        runtime_failure_count: int,
+        runtime_runs: list[RemoteRuntimeAllocation],
+    ) -> EvalOutput | None:
+        """Execute one attempt with proper span and workspace lifecycle.
+
+        Returns:
+            EvalOutput: on success, or on the *final* retry failure
+                (retry_count == max_retries) so the caller can report it.
+            None: on a non-final failure, signalling the caller should retry::
+
+                out = self._execute_single_attempt(...)
+                if out is not None:
+                    return instance, out   # done (success or final failure)
+                # else: bump counters and loop
+        """
+        workspace = None
+        exec_span = None
+        try:
+            # Serialize span context and inject via environment variable so workspace can pick it up
+            exec_span = Laminar.start_active_span(
+                "Execution",
+                span_type="EXECUTOR",  # type: ignore
+                parent_span_context=eval_span_ctx,
+            )
+            exec_span_ctx = json.dumps(Laminar.serialize_span_context(exec_span))
+            os.environ["LMNR_SPAN_CONTEXT"] = exec_span_ctx or ""
+
+            if runtime_failure_count > 0:
+                logger.warning(
+                    f"[child] Instance {instance.id}: "
+                    f"attempt {retry_count + 1}/{max_retries + 1}, "
+                    f"runtime_failure_count={runtime_failure_count}, "
+                    f"resource_factor={resource_factor}"
+                )
 
-            # This should never be reached, but added for type safety
-            error_output = self._create_error_output(
-                instance, Exception("Unexpected error: no attempts made"), max_retries
+            workspace = self.prepare_workspace(
+                instance,
+                resource_factor=resource_factor,
+                forward_env=LMNR_ENV_VARS,
             )
+
+            # Record runtime/pod mapping only for remote runtimes
+            if isinstance(workspace, APIRemoteWorkspace):
+                retry_number = retry_count + 1  # 1-indexed for readability
+                runtime_run = RemoteRuntimeAllocation(
+                    runtime_id=getattr(workspace, "_runtime_id", None),
+                    session_id=getattr(workspace, "session_id", None),
+                    runtime_url=getattr(workspace, "_runtime_url", None),
+                    resource_factor=resource_factor,
+                    critic_attempt=critic_attempt,
+                    retry=retry_number,
+                    started_at=datetime.now(timezone.utc),
+                )
+                runtime_runs.append(runtime_run)
+                logger.info(
+                    "[child] runtime allocated instance=%s attempt=%d retry=%d workspace=%s runtime_id=%s session_id=%s resource_factor=%s",
+                    instance.id,
+                    critic_attempt,
+                    retry_number,
+                    workspace.__class__.__name__,
+                    runtime_run.runtime_id,
+                    runtime_run.session_id,
+                    runtime_run.resource_factor,
+                )
+            out = self.evaluate_instance(instance, workspace)
             if runtime_runs:
-                error_output.runtime_runs = runtime_runs
-            return instance, error_output
+                out.runtime_runs = runtime_runs
+            logger.info("[child] done id=%s", instance.id)
+            return out
+        except Exception as e:
+            if exec_span is not None:
+                exec_span.record_exception(e)
+
+            # Log structured runtime allocation/init failures so we can trace instance -> runtime/pod
+            runtime_id = getattr(workspace, "_runtime_id", None) if workspace else None
+            session_id = getattr(workspace, "session_id", None) if workspace else None
+            if isinstance(workspace, APIRemoteWorkspace) or (
+                "Runtime not yet ready" in str(e)
+            ):
+                logger.warning(
+                    "[child] runtime init failure instance=%s attempt=%d retry=%d runtime_id=%s session_id=%s error=%s",
+                    instance.id,
+                    critic_attempt,
+                    retry_count + 1,
+                    runtime_id,
+                    session_id,
+                    str(e),
+                )
+
+            # TODO(#277): add an exception classifier to decide when to bump resources
+            logger.warning(
+                f"[child] Instance {instance.id}: runtime_failure_count="
+                f"{runtime_failure_count + 1}"
+            )
+
+            if retry_count < max_retries:
+                logger.warning(
+                    f"[child] Instance {instance.id} failed "
+                    f"(attempt {retry_count + 1}/{max_retries}): "
+                    f"{str(e)}"
+                )
+            else:
+                logger.error(
+                    f"[child] Instance {instance.id} failed after "
+                    f"{max_retries} retries. Last error: {str(e)}",
+                    exc_info=True,
+                )
+                # Create error output for final failure
+                error_output = self._create_error_output(instance, e, max_retries)
+                if runtime_runs:
+                    error_output.runtime_runs = runtime_runs
+                return error_output
+            return None
+        finally:
+            if workspace is not None:
+                self._cleanup_workspace(workspace, instance)
+            if exec_span is not None:
+                exec_span.end()
 
 
 # ---------- Multiprocessing logging helper ---------------------------------------
diff --git a/benchmarks/utils/laminar.py b/benchmarks/utils/laminar.py
index 2ea41eb7c..0fe9773a0 100644
--- a/benchmarks/utils/laminar.py
+++ b/benchmarks/utils/laminar.py
@@ -105,12 +105,15 @@ def create_evaluation_datapoint(
         index: int,
     ) -> UUID | None:
         """
-        Create a Laminar datapoint.
+        Create a Laminar datapoint without trace linkage.
 
-        The datapoint is registered immediately (for UI visibility and progress
-        tracking) without a trace ID.  The child process will later start the
-        root "Evaluation" span and update the datapoint with the real trace ID,
-        so the timeline accurately reflects when work begins.
+        The datapoint is created immediately for UI visibility, but the trace_id
+        is set later (via update_datapoint_trace_id) when the child process
+        actually starts the evaluation span. This ensures accurate timeline
+        measurement that excludes queue wait time.
+
+        Note: session_id and trace_metadata are intentionally not set here;
+        they are applied when the child process creates the root eval span.
 
         Returns the datapoint_id.
         """
@@ -158,9 +161,11 @@ def update_datapoint_trace_id(
                 trace_id=trace_id,
             )
         except Exception as exc:  # pragma: no cover - defensive logging
-            logger.debug(
-                "Failed to update trace_id for datapoint %s: %s",
+            logger.warning(
+                "Failed to link datapoint %s to trace %s for eval %s: %s",
                 datapoint_id,
+                trace_id,
+                eval_id,
                 exc,
             )
 
diff --git a/tests/test_workspace_cleanup.py b/tests/test_workspace_cleanup.py
index 24632dcea..c5d1f32f2 100644
--- a/tests/test_workspace_cleanup.py
+++ b/tests/test_workspace_cleanup.py
@@ -1,10 +1,12 @@
 """Tests for workspace cleanup functionality in the evaluation module."""
 
 from typing import List
-from unittest.mock import Mock
+from unittest.mock import Mock, patch
+from uuid import UUID
 
 import pytest
 
+from benchmarks.utils.laminar import LaminarEvalMetadata
 from benchmarks.utils.models import EvalInstance, EvalMetadata, EvalOutput
 from openhands.sdk import LLM
 from openhands.sdk.critic import PassCritic
@@ -253,5 +255,220 @@ def evaluate_instance(self, instance, workspace):
     assert result_output.error is None
 
 
+def test_datapoint_trace_id_linked_in_worker():
+    """Test that update_datapoint_trace_id is called when a datapoint_id is provided."""
+    from benchmarks.utils.evaluation import Evaluation
+
+    mock_workspace = Mock()
+    mock_workspace.__exit__ = Mock()
+
+    test_instance = EvalInstance(id="test_instance", data={"test": "data"})
+    test_output = EvalOutput(
+        instance_id="test_instance",
+        test_result={"success": True},
+        instruction="test instruction",
+        error=None,
+        history=[],
+        instance={"test": "data"},
+    )
+
+    eval_id = UUID("12345678-1234-1234-1234-123456789abc")
+    datapoint_id = UUID("abcdef01-abcd-abcd-abcd-abcdef012345")
+
+    llm = LLM(model="test-model")
+    metadata = EvalMetadata(
+        llm=llm,
+        dataset="test",
+        dataset_split="test",
+        max_iterations=10,
+        eval_output_dir="/tmp/test",
+        details={},
+        eval_limit=1,
+        max_attempts=1,
+        max_retries=0,
+        critic=PassCritic(),
+    )
+    metadata.lmnr = LaminarEvalMetadata(eval_id=eval_id)
+
+    class TestEvaluation(Evaluation):
+        def prepare_instances(self) -> List[EvalInstance]:
+            return [test_instance]
+
+        def prepare_workspace(
+            self,
+            instance: EvalInstance,
+            resource_factor: int = 1,
+            forward_env: list[str] | None = None,
+        ):
+            mock_workspace.forward_env = forward_env or []
+            mock_workspace.resource_factor = resource_factor
+            return mock_workspace
+
+        def evaluate_instance(self, instance, workspace):
+            return test_output
+
+    evaluator = TestEvaluation(metadata=metadata, num_workers=1)
+
+    with patch("benchmarks.utils.evaluation.LaminarService") as mock_lmnr_svc:
+        mock_service = Mock()
+        mock_lmnr_svc.get.return_value = mock_service
+
+        result_instance, result_output = evaluator._process_one_mp(
+            test_instance,
+            critic_attempt=1,
+            lmnr_datapoint_id=datapoint_id,
+        )
+
+    # Verify update_datapoint_trace_id was called with the correct eval_id and datapoint_id
+    mock_service.update_datapoint_trace_id.assert_called_once()
+    call_kwargs = mock_service.update_datapoint_trace_id.call_args
+    assert call_kwargs.kwargs["eval_id"] == eval_id
+    assert call_kwargs.kwargs["datapoint_id"] == datapoint_id
+    assert isinstance(call_kwargs.kwargs["trace_id"], UUID)
+    assert (
+        call_kwargs.kwargs["trace_id"].int != 0
+    )  # Verify it's not a zero/default UUID
+
+    assert result_instance.id == "test_instance"
+    assert result_output.error is None
+
+
+def test_datapoint_trace_id_not_linked_without_datapoint():
+    """Test that update_datapoint_trace_id is NOT called when no datapoint_id is provided."""
+    from benchmarks.utils.evaluation import Evaluation
+
+    mock_workspace = Mock()
+    mock_workspace.__exit__ = Mock()
+
+    test_instance = EvalInstance(id="test_instance", data={"test": "data"})
+    test_output = EvalOutput(
+        instance_id="test_instance",
+        test_result={"success": True},
+        instruction="test instruction",
+        error=None,
+        history=[],
+        instance={"test": "data"},
+    )
+
+    llm = LLM(model="test-model")
+    metadata = EvalMetadata(
+        llm=llm,
+        dataset="test",
+        dataset_split="test",
+        max_iterations=10,
+        eval_output_dir="/tmp/test",
+        details={},
+        eval_limit=1,
+        max_attempts=1,
+        max_retries=0,
+        critic=PassCritic(),
+    )
+
+    class TestEvaluation(Evaluation):
+        def prepare_instances(self) -> List[EvalInstance]:
+            return [test_instance]
+
+        def prepare_workspace(
+            self,
+            instance: EvalInstance,
+            resource_factor: int = 1,
+            forward_env: list[str] | None = None,
+        ):
+            mock_workspace.forward_env = forward_env or []
+            mock_workspace.resource_factor = resource_factor
+            return mock_workspace
+
+        def evaluate_instance(self, instance, workspace):
+            return test_output
+
+    evaluator = TestEvaluation(metadata=metadata, num_workers=1)
+
+    with patch("benchmarks.utils.evaluation.LaminarService") as mock_lmnr_svc:
+        mock_service = Mock()
+        mock_lmnr_svc.get.return_value = mock_service
+
+        result_instance, result_output = evaluator._process_one_mp(
+            test_instance,
+            critic_attempt=1,
+            # No lmnr_datapoint_id passed
+        )
+
+    mock_service.update_datapoint_trace_id.assert_not_called()
+    assert result_output.error is None
+
+
+def test_update_datapoint_trace_id_failure_does_not_break_eval():
+    """Test that a failure in update_datapoint_trace_id does not prevent evaluation."""
+    from benchmarks.utils.evaluation import Evaluation
+
+    mock_workspace = Mock()
+    mock_workspace.__exit__ = Mock()
+
+    test_instance = EvalInstance(id="test_instance", data={"test": "data"})
+    test_output = EvalOutput(
+        instance_id="test_instance",
+        test_result={"success": True},
+        instruction="test instruction",
+        error=None,
+        history=[],
+        instance={"test": "data"},
+    )
+
+    eval_id = UUID("12345678-1234-1234-1234-123456789abc")
+    datapoint_id = UUID("abcdef01-abcd-abcd-abcd-abcdef012345")
+
+    llm = LLM(model="test-model")
+    metadata = EvalMetadata(
+        llm=llm,
+        dataset="test",
+        dataset_split="test",
+        max_iterations=10,
+        eval_output_dir="/tmp/test",
+        details={},
+        eval_limit=1,
+        max_attempts=1,
+        max_retries=0,
+        critic=PassCritic(),
+    )
+    metadata.lmnr = LaminarEvalMetadata(eval_id=eval_id)
+
+    class TestEvaluation(Evaluation):
+        def prepare_instances(self) -> List[EvalInstance]:
+            return [test_instance]
+
+        def prepare_workspace(
+            self,
+            instance: EvalInstance,
+            resource_factor: int = 1,
+            forward_env: list[str] | None = None,
+        ):
+            mock_workspace.forward_env = forward_env or []
+            mock_workspace.resource_factor = resource_factor
+            return mock_workspace
+
+        def evaluate_instance(self, instance, workspace):
+            return test_output
+
+    evaluator = TestEvaluation(metadata=metadata, num_workers=1)
+
+    with patch("benchmarks.utils.evaluation.LaminarService") as mock_lmnr_svc:
+        mock_service = Mock()
+        mock_service.update_datapoint_trace_id.side_effect = RuntimeError(
+            "Network error"
+        )
+        mock_lmnr_svc.get.return_value = mock_service
+
+        # Should not raise despite update_datapoint_trace_id failure
+        result_instance, result_output = evaluator._process_one_mp(
+            test_instance,
+            critic_attempt=1,
+            lmnr_datapoint_id=datapoint_id,
+        )
+
+    # The evaluation should still succeed
+    assert result_instance.id == "test_instance"
+    assert result_output.error is None
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/uv.lock b/uv.lock
index db834cb47..301f593f5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1741,7 +1741,7 @@ wheels = [
 
 [[package]]
 name = "lmnr"
-version = "0.7.25"
+version = "0.7.44"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "grpcio", version = "1.67.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" },
@@ -1762,9 +1762,9 @@ dependencies = [
     { name = "tenacity" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/10/bd/a65219ca6f09199e35a14a55acb503e3ac896db15018d342076bd24401e1/lmnr-0.7.25.tar.gz", hash = "sha256:a3a0ba9a305243bbe97f2fcb8afc7d39d201dc11107b4633c257b64b838b2979", size = 203876, upload-time = "2025-12-18T17:31:24.507Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/8d/6077e76cc1801799c496cd31686c68a40ac87afc508de5725767c1bea51d/lmnr-0.7.44.tar.gz", hash = "sha256:001cdb87554afcc1afff72333fce820591a595b30962486437186590ceb1c20b", size = 239647, upload-time = "2026-02-27T18:41:02.638Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/35/1f22e3fea98416d58dddbdbc63e18ddcfb2b8f850b8ec065652a90d99666/lmnr-0.7.25-py3-none-any.whl", hash = "sha256:c0539d5f8c8e59a2d5d0ab04e498a82351d51fde8cf04ef8b312424e0be537ac", size = 266040, upload-time = "2025-12-18T17:31:22.986Z" },
+    { url = "https://files.pythonhosted.org/packages/be/0d/7b4cd20a70085fd0ff844197245b214aaaf8a0f13098f356be9e7d22fc4f/lmnr-0.7.44-py3-none-any.whl", hash = "sha256:3c6f7b444586d9aa3db0b0ff9f9249c4d3647e146c899523bfefad2ffd332760", size = 314771, upload-time = "2026-02-27T18:41:00.869Z" },
 ]
 
 [[package]]
@@ -2399,7 +2399,7 @@ requires-dist = [
     { name = "huggingface-hub" },
     { name = "jinja2" },
     { name = "litellm", specifier = ">=1.77.7.dev9" },
-    { name = "lmnr", specifier = ">=0.7.24" },
+    { name = "lmnr", specifier = ">=0.7.41" },
     { name = "modal", specifier = ">=1.1.4" },
     { name = "multi-swe-bench", marker = "sys_platform != 'darwin'", specifier = ">=1.1.1" },
     { name = "openhands-agent-server", editable = "vendor/software-agent-sdk/openhands-agent-server" },
@@ -2526,32 +2526,32 @@ requires-dist = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "importlib-metadata" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-proto" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "googleapis-common-protos" },
@@ -2563,14 +2563,14 @@ dependencies = [
     { name = "opentelemetry-sdk" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a2/c0/43222f5b97dc10812bc4f0abc5dc7cd0a2525a91b5151d26c9e2e958f52e/opentelemetry_exporter_otlp_proto_grpc-1.38.0.tar.gz", hash = "sha256:2473935e9eac71f401de6101d37d6f3f0f1831db92b953c7dcc912536158ebd6", size = 24676, upload-time = "2025-10-16T08:35:53.83Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/48/b329fed2c610c2c32c9366d9dc597202c9d1e58e631c137ba15248d8850f/opentelemetry_exporter_otlp_proto_grpc-1.39.1.tar.gz", hash = "sha256:772eb1c9287485d625e4dbe9c879898e5253fea111d9181140f51291b5fec3ad", size = 24650, upload-time = "2025-12-11T13:32:41.429Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/f0/bd831afbdba74ca2ce3982142a2fad707f8c487e8a3b6fef01f1d5945d1b/opentelemetry_exporter_otlp_proto_grpc-1.38.0-py3-none-any.whl", hash = "sha256:7c49fd9b4bd0dbe9ba13d91f764c2d20b0025649a6e4ac35792fb8d84d764bc7", size = 19695, upload-time = "2025-10-16T08:35:35.053Z" },
+    { url = "https://files.pythonhosted.org/packages/81/a3/cc9b66575bd6597b98b886a2067eea2693408d2d5f39dad9ab7fc264f5f3/opentelemetry_exporter_otlp_proto_grpc-1.39.1-py3-none-any.whl", hash = "sha256:fa1c136a05c7e9b4c09f739469cbdb927ea20b34088ab1d959a849b5cc589c18", size = 19766, upload-time = "2025-12-11T13:32:21.027Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-http"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "googleapis-common-protos" },
@@ -2581,14 +2581,14 @@ dependencies = [
     { name = "requests" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/04/2a08fa9c0214ae38880df01e8bfae12b067ec0793446578575e5080d6545/opentelemetry_exporter_otlp_proto_http-1.39.1.tar.gz", hash = "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", size = 17288, upload-time = "2025-12-11T13:32:42.029Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
+    { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation"
-version = "0.59b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -2596,62 +2596,62 @@ dependencies = [
     { name = "packaging" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/04/ed/9c65cd209407fd807fa05be03ee30f159bdac8d59e7ea16a8fe5a1601222/opentelemetry_instrumentation-0.59b0.tar.gz", hash = "sha256:6010f0faaacdaf7c4dff8aac84e226d23437b331dcda7e70367f6d73a7db1adc", size = 31544, upload-time = "2025-10-16T08:39:31.959Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/f5/7a40ff3f62bfe715dad2f633d7f1174ba1a7dd74254c15b2558b3401262a/opentelemetry_instrumentation-0.59b0-py3-none-any.whl", hash = "sha256:44082cc8fe56b0186e87ee8f7c17c327c4c2ce93bdbe86496e600985d74368ee", size = 33020, upload-time = "2025-10-16T08:38:31.463Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation-threading"
-version = "0.59b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-instrumentation" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/82/7a/84e97d8992808197006e607ae410c2219bdbbc23d1289ba0c244d3220741/opentelemetry_instrumentation_threading-0.59b0.tar.gz", hash = "sha256:ce5658730b697dcbc0e0d6d13643a69fd8aeb1b32fa8db3bade8ce114c7975f3", size = 8770, upload-time = "2025-10-16T08:40:03.587Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9b/0a/e36123ec4c0910a3936b92982545a53e9bca5b26a28df06883751a783f84/opentelemetry_instrumentation_threading-0.60b1.tar.gz", hash = "sha256:20b18a68abe5801fa9474336b7c27487d4af3e00b66f6a8734e4fdd75c8b0b43", size = 8768, upload-time = "2025-12-11T13:37:16.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/50/32d29076aaa1c91983cdd3ca8c6bb4d344830cd7d87a7c0fdc2d98c58509/opentelemetry_instrumentation_threading-0.59b0-py3-none-any.whl", hash = "sha256:76da2fc01fe1dccebff6581080cff9e42ac7b27cc61eb563f3c4435c727e8eca", size = 9313, upload-time = "2025-10-16T08:39:15.876Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a3/448738b927bcc1843ace7d4ed55dd54441a71363075eeeee89c5944dd740/opentelemetry_instrumentation_threading-0.60b1-py3-none-any.whl", hash = "sha256:92a52a60fee5e32bc6aa8f5acd749b15691ad0bc4457a310f5736b76a6d9d1de", size = 9312, upload-time = "2025-12-11T13:36:28.434Z" },
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
+    { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" },
 ]
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.38.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.59b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
 ]
 
 [[package]]