Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
750bdbe
Add complete benchmark platform plans for Nature Methods paper
trissim Dec 19, 2025
5febdae
chore: tighten benchmark platform plans
trissim Dec 20, 2025
5f1b5e9
Add research findings for benchmark platform datasets and pipelines
trissim Dec 23, 2025
c3532ba
Add BBBC microscope handlers for benchmark datasets
trissim Dec 23, 2025
08fe25d
Add TESTED BBBC microscope handlers for benchmark datasets
trissim Dec 23, 2025
2aef1af
refactor: microscope detection, metadata caching, and signal batching
trissim Dec 24, 2025
3bc7445
feat: benchmark platform for OpenHCS and tool comparison
trissim Dec 24, 2025
ece9962
feat(benchmark): Add CellProfiler → OpenHCS converter with LLM absorp…
trissim Dec 24, 2025
a259e8d
wip(gui): [EXPERIMENTAL] Geometry tracking for flash overlays
trissim Dec 24, 2025
7c75a70
fix(converter): LLM-inferred contract/category, remove LLM fallback
trissim Dec 24, 2025
5a3c3e7
feat(benchmark): Complete CellProfiler to OpenHCS converter with 88 a…
trissim Dec 24, 2025
fef610e
Fix CellProfiler parameter mapping and kwargs passing
trissim Dec 24, 2025
fd0bb49
Recategorize CellProfiler functions with correct variable_components …
trissim Dec 24, 2025
6d13d43
Fix CellProfiler function categorization with correct variable_compon…
trissim Dec 24, 2025
728edda
Fix UnmixColors categorization: channel_operation → image_operation
trissim Dec 24, 2025
8f2a836
Fix dimensional flexibility in absorbed functions
trissim Dec 24, 2025
fee0c33
Fix pipeline generator and parameter mappings
trissim Dec 24, 2025
191183b
Fix IdentifyTertiaryObjects parameter mapping
trissim Dec 24, 2025
78dc00a
docs: Add comprehensive CellProfiler refactor plan
trissim Dec 27, 2025
811b1cb
docs: Move CellProfiler refactor plan to plans folder
trissim Dec 27, 2025
17e1608
docs: Move CellProfiler refactor plan to root plans folder
trissim Dec 27, 2025
4169c07
Merge main into benchmark-platform
trissim Feb 16, 2026
ece0bb4
Add CellProfiler integration architecture design document
trissim Feb 16, 2026
94f6064
Add CellProfiler to OpenHCS architecture mapping document
trissim Feb 16, 2026
7fd0918
Add comprehensive 'Context for New Agents' section to CellProfiler de…
trissim Feb 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Public API for the benchmark platform."""

from benchmark.contracts.dataset import DatasetSpec, AcquiredDataset
from benchmark.contracts.metric import MetricCollector
from benchmark.contracts.tool_adapter import (
BenchmarkResult,
ToolAdapter,
ToolAdapterError,
ToolExecutionError,
ToolNotInstalledError,
ToolVersionError,
)
from benchmark.datasets.registry import BBBC021_SINGLE_PLATE, get_dataset_spec, DATASET_REGISTRY
from benchmark.datasets.acquire import acquire_dataset, DatasetAcquisitionError
from benchmark.metrics.time import TimeMetric
from benchmark.metrics.memory import MemoryMetric
from benchmark.pipelines.registry import (
PipelineSpec,
NUCLEI_SEGMENTATION,
get_pipeline_spec,
PIPELINE_REGISTRY,
)
from benchmark.adapters.openhcs import OpenHCSAdapter
from benchmark.runner import run_benchmark

__all__ = [
# Contracts
"DatasetSpec",
"AcquiredDataset",
"MetricCollector",
"BenchmarkResult",
"ToolAdapter",
"ToolAdapterError",
"ToolExecutionError",
"ToolNotInstalledError",
"ToolVersionError",
# Datasets
"DatasetAcquisitionError",
"acquire_dataset",
"BBBC021_SINGLE_PLATE",
"DATASET_REGISTRY",
"get_dataset_spec",
# Pipelines
"PipelineSpec",
"NUCLEI_SEGMENTATION",
"PIPELINE_REGISTRY",
"get_pipeline_spec",
# Metrics
"TimeMetric",
"MemoryMetric",
# Adapters
"OpenHCSAdapter",
# Runner
"run_benchmark",
]
5 changes: 5 additions & 0 deletions benchmark/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Tool adapters."""

from benchmark.adapters.openhcs import OpenHCSAdapter

__all__ = ["OpenHCSAdapter"]
188 changes: 188 additions & 0 deletions benchmark/adapters/openhcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""OpenHCS tool adapter."""

from __future__ import annotations

import logging
from contextlib import ExitStack
from pathlib import Path
from typing import Any

import numpy as np
from tqdm import tqdm
from skimage import filters, morphology, measure

from benchmark.contracts.tool_adapter import (
BenchmarkResult,
ToolAdapter,
ToolExecutionError,
ToolNotInstalledError,
)
from benchmark.contracts.metric import MetricCollector

logger = logging.getLogger(__name__)


class OpenHCSAdapter(ToolAdapter):
"""OpenHCS tool adapter."""

name = "OpenHCS"

def __init__(self):
import openhcs

self.version = openhcs.__version__

def validate_installation(self) -> None:
"""Check OpenHCS is importable."""
try:
import openhcs # noqa: F401
except ImportError as exc:
raise ToolNotInstalledError(f"OpenHCS not installed: {exc}") from exc

def _prepare_filemanager(self):
"""Initialize FileManager and microscope handler."""
from openhcs.io.filemanager import FileManager
from openhcs.io.base import storage_registry, ensure_storage_registry

ensure_storage_registry()
return FileManager(storage_registry)

def _load_microscope(self, filemanager, dataset_path: Path, microscope_type: str):
"""Create microscope handler for dataset."""
from openhcs.microscopes import create_microscope_handler

return create_microscope_handler(
microscope_type=microscope_type or "auto",
plate_folder=dataset_path,
filemanager=filemanager,
allowed_auto_types=[microscope_type] if microscope_type else None,
)

def _run_minimal_pipeline(self, image: np.ndarray, params: dict[str, Any]) -> np.ndarray:
"""Blur → threshold → label segmentation pipeline."""
method = params.get("threshold_method")
if method not in (None, "Otsu"):
raise ToolExecutionError(f"Unsupported threshold_method '{method}'")

scope = params.get("threshold_scope")
if scope not in (None, "Global"):
raise ToolExecutionError(f"Unsupported threshold_scope '{scope}'")

declump = params.get("declump_method")
if declump not in (None, "Shape"):
raise ToolExecutionError(f"Unsupported declump_method '{declump}'")

diameter_range = params.get("diameter_range")
if diameter_range is not None and (
not isinstance(diameter_range, tuple)
or len(diameter_range) != 2
or not all(isinstance(x, (int, float)) for x in diameter_range)
):
raise ToolExecutionError("diameter_range must be a (min, max) tuple")

# Convert to float for processing while preserving dynamic range
if image.dtype != np.float32:
image = image.astype(np.float32)

# Gaussian blur
blurred = filters.gaussian(image, sigma=1)

# Threshold
threshold_value = filters.threshold_otsu(blurred)
mask = blurred > threshold_value

# Optional morphological opening to denoise
radius = params.get("opening_radius", 0)
if radius and radius > 0:
selem = morphology.disk(radius)
mask = morphology.opening(mask, selem)

# Fill small holes if requested
if params.get("fill_holes", False):
mask = morphology.remove_small_holes(mask)

labels = measure.label(mask)

# Apply size filtering derived from diameter_range if provided
if diameter_range:
min_d, max_d = diameter_range
min_area = np.pi * (min_d / 2) ** 2
max_area = np.pi * (max_d / 2) ** 2
props = measure.regionprops(labels)
remove_ids = [
prop.label
for prop in props
if prop.area < min_area or prop.area > max_area
]
if remove_ids:
mask = np.isin(labels, remove_ids, invert=True)
labels = measure.label(mask)
return labels.astype(np.uint16)

def run(
self,
dataset_path: Path,
pipeline_name: str,
pipeline_params: dict[str, Any],
metrics: list[Any],
output_dir: Path,
) -> BenchmarkResult:
"""Execute OpenHCS pipeline with metrics."""
output_dir.mkdir(parents=True, exist_ok=True)

microscope_type = pipeline_params.get("microscope_type")
if microscope_type in (None, "auto"):
raise ToolExecutionError("microscope_type must be explicit (e.g., 'bbbc021'); auto-detect is not allowed.")

# Validate metric collectors
for metric in metrics:
if not isinstance(metric, MetricCollector):
raise ToolExecutionError(f"Metric {metric} does not extend MetricCollector")

filemanager = self._prepare_filemanager()

try:
microscope_handler = self._load_microscope(filemanager, dataset_path, microscope_type)
except Exception as exc:
raise ToolExecutionError(f"Failed to create microscope handler: {exc}") from exc

# Enumerate image files via FileManager (leveraging OpenHCS discovery)
try:
from openhcs.constants.constants import Backend
image_paths = filemanager.list_image_files(dataset_path, Backend.DISK.value, recursive=True)
except Exception as exc:
raise ToolExecutionError(f"Failed to list dataset images: {exc}") from exc

if not image_paths:
raise ToolExecutionError(f"No image files found in dataset path: {dataset_path}")

with ExitStack() as stack:
for metric in metrics:
stack.enter_context(metric)

for img_path in tqdm(image_paths, desc="OpenHCS pipeline", leave=False):
image = filemanager.load(img_path, "disk", content_type="image")
labels = self._run_minimal_pipeline(image, pipeline_params)

output_path = output_dir / f"{Path(img_path).stem}_labels.tif"
filemanager.save(labels, output_path, "disk")

# Collect metrics after contexts have exited
metric_results: dict[str, Any] = {
metric.name: metric.get_result() for metric in metrics
}

return BenchmarkResult(
tool_name=self.name,
dataset_id=pipeline_params.get("dataset_id", dataset_path.name),
pipeline_name=pipeline_name,
metrics=metric_results,
output_path=output_dir,
success=True,
error_message=None,
provenance={
"openhcs_version": getattr(self, "version", "unknown"),
"microscope_type": microscope_handler.microscope_type,
"image_count": len(image_paths),
},
)
80 changes: 80 additions & 0 deletions benchmark/cellprofiler_library/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
CellProfiler Library - Absorbed into OpenHCS

Maps CellProfiler module names to OpenHCS functions.
Functions are loaded dynamically from contracts.json to avoid import errors.
"""

from typing import Dict, Callable, Optional
from pathlib import Path
import json
import importlib

# Load contracts registry
_CONTRACTS_PATH = Path(__file__).parent / "contracts.json"
_contracts: Dict = {}

if _CONTRACTS_PATH.exists():
_contracts = json.loads(_CONTRACTS_PATH.read_text())

# Dynamic function loader
_function_cache: Dict[str, Callable] = {}


def get_function(module_name: str) -> Optional[Callable]:
"""Get an OpenHCS function by its CellProfiler module name.

Dynamically loads the function from the appropriate module file.
Returns None if the module is not found.
"""
if module_name in _function_cache:
return _function_cache[module_name]

if module_name not in _contracts:
return None

meta = _contracts[module_name]
func_name = meta["function_name"]

# Try multiple file name patterns
# 1. Remove underscores from function name
# 2. Use module name (lowercase, no underscores)
# 3. Try partial matches
file_stems_to_try = [
func_name.replace('_', ''), # gray_to_color_rgb -> graytocolorrgb
module_name.lower().replace('_', ''), # GrayToColorRgb -> graytocolorrgb
]

# Also try common prefixes (e.g., graytocolor for gray_to_color_rgb)
parts = func_name.split('_')
if len(parts) > 2:
file_stems_to_try.append(''.join(parts[:-1])) # gray_to_color_rgb -> graytocolor

for file_stem in file_stems_to_try:
try:
module = importlib.import_module(f".functions.{file_stem}", package=__package__)
func = getattr(module, func_name, None)
if func is not None:
_function_cache[module_name] = func
return func
except (ImportError, AttributeError):
continue

return None


def get_contract(module_name: str) -> Optional[Dict]:
"""Get the contract metadata for a CellProfiler module."""
return _contracts.get(module_name)


def list_modules() -> list:
"""List all available CellProfiler module names."""
return list(_contracts.keys())


__all__ = [
"get_function",
"get_contract",
"list_modules",
]
Loading
Loading