From a61581ab864b0b3781d19675504b112676ea511b Mon Sep 17 00:00:00 2001 From: Andrew Sazonov Date: Wed, 11 Feb 2026 13:23:03 +0100 Subject: [PATCH] Extend initial scipp-analysis integration test suite for DREAM (#124) * Extend DREAM scipp-analysis integration tests * Fetch CIF uncached and update DREAM data checks * Refine DREAM integration tests for robustness * Enable DREAM reduced-data fitting in tests * Update DREAM integration tests for API changes * Refactor DREAM tests; drop legacy scipp-cif * Round 2theta range to 5dp in CIF output * Update data index SHA256 hash * Relax version check to support dev builds * Add essdiffraction dependency * Prevent zero SUs; default intensity SU to 1.0 * Update tests/integration/scipp-analysis/dream/conftest.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Relax point ID check; allow conftest asserts * Inline sample model and consolidate test fixtures * Replace assert_almost_equal with pytest.approx --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/coverage.yaml | 2 +- pyproject.toml | 12 +- .../analysis/calculators/cryspy.py | 4 +- .../experiments/categories/data/bragg_pd.py | 19 +- src/easydiffraction/utils/utils.py | 2 +- .../scipp-analysis/dream/conftest.py | 60 +++++ .../dream/test_analyze_reduced_data.py | 213 ++++++++++++++++++ .../dream/test_package_import.py | 71 ++++++ .../dream/test_read_reduced_data.py | 33 +++ .../scipp-analysis/dream/test_scipp-cif.py | 74 ------ .../dream/test_validate_meta_data.py | 78 +++++++ .../dream/test_validate_physical_data.py | 94 ++++++++ 12 files changed, 580 insertions(+), 82 deletions(-) create mode 100644 tests/integration/scipp-analysis/dream/conftest.py create mode 100644 tests/integration/scipp-analysis/dream/test_analyze_reduced_data.py create mode 100644 tests/integration/scipp-analysis/dream/test_package_import.py create mode 100644 tests/integration/scipp-analysis/dream/test_read_reduced_data.py delete mode 100644 tests/integration/scipp-analysis/dream/test_scipp-cif.py create mode 100644 tests/integration/scipp-analysis/dream/test_validate_meta_data.py create mode 100644 tests/integration/scipp-analysis/dream/test_validate_physical_data.py diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index cd212177..96ae3386 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -86,7 +86,7 @@ jobs: verbose: true token: ${{ secrets.CODECOV_TOKEN }} - # Job 2: Run integration tests with coverage and upload to Codecov + # Job 3: Run integration tests with coverage and upload to Codecov integration-tests-coverage: runs-on: ubuntu-latest diff --git a/pyproject.toml b/pyproject.toml index f152413c..fb0118e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ ] requires-python = '>=3.11,<3.14' dependencies = [ + 'essdiffraction', # ESS Diffraction library 'numpy', # Numerical computing library 'colorama', # Color terminal output 'tabulate', # Pretty-print tabular data for terminal output @@ -190,7 +191,13 @@ fail_under = 65 # Temporarily reduce to allow gradual improvement [tool.ruff] # Temporarily exclude some directories until we have improved the code quality there -exclude = ['tests', 'tmp'] +#exclude = ['tests', 'tmp'] +exclude = [ + 'tmp', + 'tests/unit', + 'tests/integration/fitting', + 'tests/integration/scipp-analysis/tmp', +] indent-width = 4 line-length = 99 # Enable new rules that are not yet stable, like DOC @@ -264,7 +271,8 @@ ban-relative-imports = 'all' force-single-line = true [tool.ruff.lint.per-file-ignores] -'*test_*.py' = ['S101'] # allow asserts in test files +'*test_*.py' = ['S101'] # allow asserts in test files +'conftest.py' = ['S101'] # allow asserts in test files # Vendored jupyter_dark_detect: keep as-is from upstream for easy updates # https://github.com/OpenMined/jupyter-dark-detect/tree/main/jupyter_dark_detect 'src/easydiffraction/utils/_vendored/jupyter_dark_detect/*' = [ diff --git a/src/easydiffraction/analysis/calculators/cryspy.py b/src/easydiffraction/analysis/calculators/cryspy.py index a24fda7f..18e7858d 100644 --- a/src/easydiffraction/analysis/calculators/cryspy.py +++ b/src/easydiffraction/analysis/calculators/cryspy.py @@ -361,8 +361,8 @@ def _convert_experiment_to_cryspy_cif( cif_lines.append(f'{engine_key_name} {attr_obj.value}') x_data = experiment.data.x - twotheta_min = float(x_data.min()) - twotheta_max = float(x_data.max()) + twotheta_min = f'{np.round(x_data.min(), 5):.5f}' # float(x_data.min()) + twotheta_max = f'{np.round(x_data.max(), 5):.5f}' # float(x_data.max()) cif_lines.append('') if expt_type.beam_mode.value == BeamModeEnum.CONSTANT_WAVELENGTH: cif_lines.append(f'_range_2theta_min {twotheta_min}') diff --git a/src/easydiffraction/experiments/categories/data/bragg_pd.py b/src/easydiffraction/experiments/categories/data/bragg_pd.py index 47bdf1cb..da92da30 100644 --- a/src/easydiffraction/experiments/categories/data/bragg_pd.py +++ b/src/easydiffraction/experiments/categories/data/bragg_pd.py @@ -76,7 +76,7 @@ def __init__(self, **kwargs): description='Standard uncertainty of the measured intensity at this data point.', value_spec=AttributeSpec( type_=DataTypes.NUMERIC, - default=0.0, + default=1.0, content_validator=RangeValidator(ge=0), ), cif_handler=CifHandler( @@ -321,7 +321,22 @@ def meas(self) -> np.ndarray: @property def meas_su(self) -> np.ndarray: - return np.fromiter((p.intensity_meas_su.value for p in self._calc_items), dtype=float) + # TODO: The following is a temporary workaround to handle zero + # or near-zero uncertainties in the data, when dats is loaded + # from CIF files. This is necessary because zero uncertainties + # cause fitting algorithms to fail. + # The current implementation is inefficient. + # In the future, we should extend the functionality of + # the NumericDescriptor to automatically replace the value + # outside of the valid range (`content_validator`) with a + # default value (`default`), when the value is set. + # BraggPdExperiment._load_ascii_data_to_experiment() handles + # this for ASCII data, but we also need to handle CIF data and + # come up with a consistent approach for both data sources. + original = np.fromiter((p.intensity_meas_su.value for p in self._calc_items), dtype=float) + # Replace values smaller than 0.0001 with 1.0 + modified = np.where(original < 0.0001, 1.0, original) + return modified @property def calc(self) -> np.ndarray: diff --git a/src/easydiffraction/utils/utils.py b/src/easydiffraction/utils/utils.py index 13b90b16..2b6f9c50 100644 --- a/src/easydiffraction/utils/utils.py +++ b/src/easydiffraction/utils/utils.py @@ -74,7 +74,7 @@ def _fetch_data_index() -> dict: _validate_url(index_url) # macOS: sha256sum index.json - index_hash = 'sha256:e78f5dd2f229ea83bfeb606502da602fc0b07136889877d3ab601694625dd3d7' + index_hash = 'sha256:9aceaf51d298992058c80903283c9a83543329a063692d49b7aaee1156e76884' destination_dirname = 'easydiffraction' destination_fname = 'data-index.json' cache_dir = pooch.os_cache(destination_dirname) diff --git a/tests/integration/scipp-analysis/dream/conftest.py b/tests/integration/scipp-analysis/dream/conftest.py new file mode 100644 index 00000000..56aabf1f --- /dev/null +++ b/tests/integration/scipp-analysis/dream/conftest.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Shared fixtures for DREAM scipp-analysis integration tests. + +This module provides pytest fixtures for downloading and parsing +reduced diffraction data from the DREAM instrument in CIF format. +""" + +from pathlib import Path + +import gemmi +import pytest +from pooch import retrieve + +# Remote CIF file URL (regenerated nightly by scipp reduction pipeline) +CIF_URL = 'https://pub-6c25ef91903d4301a3338bd53b370098.r2.dev/dream_reduced.cif' + +# Expected datablock name in the CIF file +DATABLOCK_NAME = 'reduced_tof' + + +@pytest.fixture(scope='module') +def cif_path( + tmp_path_factory: pytest.TempPathFactory, +) -> str: + """Download CIF file fresh each test session and return its path. + + Uses tmp_path_factory to avoid pooch caching, ensuring the latest + version of the nightly-regenerated CIF file is always used. + """ + tmp_dir = tmp_path_factory.mktemp('dream_data') + return retrieve(url=CIF_URL, known_hash=None, path=tmp_dir) + + +@pytest.fixture(scope='module') +def cif_content( + cif_path: str, +) -> str: + """Read the CIF file content as text.""" + return Path(cif_path).read_text() + + +@pytest.fixture(scope='module') +def cif_document( + cif_path: str, +) -> gemmi.cif.Document: + """Read the CIF file with gemmi and return the document.""" + return gemmi.cif.read(cif_path) + + +@pytest.fixture(scope='module') +def cif_block( + cif_document: gemmi.cif.Document, +) -> gemmi.cif.Block: + """Return the 'reduced_tof' data block from the CIF document.""" + block = cif_document.find_block(DATABLOCK_NAME) + assert block is not None, ( + f'Expected CIF datablock {DATABLOCK_NAME!r} was not found in the document.' + ) + return block diff --git a/tests/integration/scipp-analysis/dream/test_analyze_reduced_data.py b/tests/integration/scipp-analysis/dream/test_analyze_reduced_data.py new file mode 100644 index 00000000..eb528ff5 --- /dev/null +++ b/tests/integration/scipp-analysis/dream/test_analyze_reduced_data.py @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Tests for analyzing reduced diffraction data using easydiffraction. + +These tests verify the complete workflow: +1. Define project +2. Add sample model manually defined +3. Modify experiment CIF file +4. Add experiment from modified CIF file +5. Modify default experiment configuration +6. Select parameters to be fitted +7. Do fitting +""" + +from pathlib import Path + +import pytest + +import easydiffraction as ed + +# CIF experiment type tags required by easydiffraction to identify +# the experiment configuration (powder TOF neutron diffraction) +EXPT_TYPE_TAGS = { + '_expt_type.sample_form': 'powder', + '_expt_type.beam_mode': 'time-of-flight', + '_expt_type.radiation_probe': 'neutron', + '_expt_type.scattering_type': 'bragg', +} + + +@pytest.fixture(scope='module') +def prepared_cif_path( + cif_path: str, + tmp_path_factory: pytest.TempPathFactory, +) -> str: + """Prepare CIF file with experiment type tags for + easydiffraction. + """ + with Path(cif_path).open() as f: + content = f.read() + + # Add experiment type tags if missing + for tag, value in EXPT_TYPE_TAGS.items(): + if tag not in content: + content += f'\n{tag} {value}' + + # Write to temp file + tmp_dir = tmp_path_factory.mktemp('dream_data') + prepared_path = tmp_dir / 'dream_reduced_prepared.cif' + prepared_path.write_text(content) + + return str(prepared_path) + + +@pytest.fixture(scope='module') +def project_with_data( + prepared_cif_path: str, +) -> ed.Project: + """Create project with sample model, experiment data, and + configuration. + + 1. Define project + 2. Add sample model manually defined + 3. Modify experiment CIF file + 4. Add experiment from modified CIF file + 5. Modify default experiment configuration + """ + # Step 1: Define Project + project = ed.Project() + + # Step 2: Define Sample Model manually + project.sample_models.add(name='si') + sample_model = project.sample_models['si'] + + sample_model.space_group.name_h_m = 'F d -3 m' + sample_model.space_group.it_coordinate_system_code = '1' + + sample_model.cell.length_a = 5.43146 + + sample_model.atom_sites.add( + label='Si', + type_symbol='Si', + fract_x=0.125, + fract_y=0.125, + fract_z=0.125, + wyckoff_letter='c', + b_iso=1.1, + ) + + # Step 3: Add experiment from modified CIF file + project.experiments.add(cif_path=prepared_cif_path) + experiment = project.experiments['reduced_tof'] + + # Step 4: Configure experiment + # Link phase + experiment.linked_phases.add(id='si', scale=0.8) + + # Instrument setup + experiment.instrument.setup_twotheta_bank = 90.0 + experiment.instrument.calib_d_to_tof_linear = 18630.0 + + # Peak profile parameters + experiment.peak.broad_gauss_sigma_0 = 48500.0 + experiment.peak.broad_gauss_sigma_1 = 3000.0 + experiment.peak.broad_gauss_sigma_2 = 0.0 + experiment.peak.broad_mix_beta_0 = 0.05 + experiment.peak.broad_mix_beta_1 = 0.0 + experiment.peak.asym_alpha_0 = 0.0 + experiment.peak.asym_alpha_1 = 0.26 + + # Excluded regions + experiment.excluded_regions.add(id='1', start=0, end=10000) + experiment.excluded_regions.add(id='2', start=70000, end=200000) + + # Background points + background_points = [ + ('2', 10000, 0.01), + ('3', 14000, 0.2), + ('4', 21000, 0.7), + ('5', 27500, 0.6), + ('6', 40000, 0.3), + ('7', 50000, 0.6), + ('8', 61000, 0.7), + ('9', 70000, 0.6), + ] + for id_, x, y in background_points: + experiment.background.add(id=id_, x=x, y=y) + + return project + + +@pytest.fixture(scope='module') +def fitted_project( + project_with_data: ed.Project, +) -> ed.Project: + """Perform fit and return project with results. + + 6. Select parameters to be fitted + 7. Do fitting + """ + project = project_with_data + sample_model = project.sample_models['si'] + experiment = project.experiments['reduced_tof'] + + # Step 5: Select parameters to be fitted + # Set free parameters for sample model + sample_model.atom_sites['Si'].b_iso.free = True + + # Set free parameters for experiment + experiment.linked_phases['si'].scale.free = True + experiment.instrument.calib_d_to_tof_linear.free = True + + experiment.peak.broad_gauss_sigma_0.free = True + experiment.peak.broad_gauss_sigma_1.free = True + experiment.peak.broad_mix_beta_0.free = True + + # Set free parameters for background + for point in experiment.background: + point.y.free = True + + # Step 6: Do fitting + project.analysis.fit() + + return project + + +# Test: Data Loading + + +def test_analyze_reduced_data__load_cif( + project_with_data: ed.Project, +) -> None: + """Verify CIF data loads into project correctly.""" + assert 'reduced_tof' in project_with_data.experiments.names + + +def test_analyze_reduced_data__data_size( + project_with_data: ed.Project, +) -> None: + """Verify loaded data has expected size.""" + experiment = project_with_data.experiments['reduced_tof'] + # Data should have substantial number of points + assert experiment.data.x.size > 100 + + +# Test: Configuration + + +def test_analyze_reduced_data__phase_linked( + project_with_data: ed.Project, +) -> None: + """Verify phase is correctly linked to experiment.""" + experiment = project_with_data.experiments['reduced_tof'] + assert 'si' in experiment.linked_phases.names + + +def test_analyze_reduced_data__background_set( + project_with_data: ed.Project, +) -> None: + """Verify background points are configured.""" + experiment = project_with_data.experiments['reduced_tof'] + assert len(experiment.background.names) >= 5 + + +# Test: Fitting + + +def test_analyze_reduced_data__fit_quality( + fitted_project: ed.Project, +) -> None: + """Verify fit quality is reasonable (chi-square value).""" + chi_square = fitted_project.analysis.fit_results.reduced_chi_square + assert chi_square == pytest.approx(16.0, abs=0.1) diff --git a/tests/integration/scipp-analysis/dream/test_package_import.py b/tests/integration/scipp-analysis/dream/test_package_import.py new file mode 100644 index 00000000..7c10d02b --- /dev/null +++ b/tests/integration/scipp-analysis/dream/test_package_import.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Tests for verifying package installation and version consistency. + +These tests check that easydiffraction and essdiffraction packages are +installed and are not older than the latest PyPI release. +""" + +import importlib.metadata + +import pytest +import requests +from packaging.version import Version + +PACKAGE_NAMES = ['easydiffraction', 'essdiffraction'] +PYPI_URL = 'https://pypi.org/pypi/{}/json' + + +def get_installed_version( + package_name: str, +) -> str | None: + """Get the installed version of a package.""" + try: + return importlib.metadata.version(package_name) + except importlib.metadata.PackageNotFoundError: + return None + + +def get_latest_version( + package_name: str, +) -> str | None: + """Get the latest version of a package from PyPI.""" + response = requests.get(PYPI_URL.format(package_name), timeout=10) + if response.status_code == 200: + return response.json()['info']['version'] + return None + + +def get_base_version( + version_str: str, +) -> str: + """Extract MAJOR.MINOR.PATCH from version string, ignoring local + identifiers. + """ + v = Version(version_str) + return v.base_version + + +@pytest.mark.parametrize('package_name', PACKAGE_NAMES) +def test_package_import( + package_name: str, +) -> None: + """Verify installed package is not older than PyPI latest version. + + Uses >= comparison to support both: + - Real releases where installed == latest + - Dev builds where installed (e.g., 999.0.0) > latest + """ + installed_version = get_installed_version(package_name) + latest_version = get_latest_version(package_name) + + assert installed_version is not None, f'Package {package_name} is not installed.' + assert latest_version is not None, f'Could not fetch latest version for {package_name}.' + + # Compare only MAJOR.MINOR.PATCH, ignoring local version identifiers + installed_base = Version(get_base_version(installed_version)) + latest_base = Version(get_base_version(latest_version)) + + assert installed_base >= latest_base, ( + f'Package {package_name} is outdated: Installed={installed_base}, Latest={latest_base}' + ) diff --git a/tests/integration/scipp-analysis/dream/test_read_reduced_data.py b/tests/integration/scipp-analysis/dream/test_read_reduced_data.py new file mode 100644 index 00000000..616c9876 --- /dev/null +++ b/tests/integration/scipp-analysis/dream/test_read_reduced_data.py @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Tests for reading reduced data from CIF files. + +These tests verify that the CIF file can be fetched, read as text, +and parsed by gemmi without errors. +""" + +import gemmi + + +def test_read_reduced_data__fetch_cif( + cif_path: str, +) -> None: + """Verify that the CIF file can be fetched from remote URL.""" + assert cif_path is not None + assert len(cif_path) > 0 + + +def test_read_reduced_data__py_read_cif( + cif_content: str, +) -> None: + """Verify CIF file can be read as text with CIF 1.1 header.""" + assert len(cif_content) > 0 + assert '#\\#CIF_1.1' in cif_content + + +def test_read_reduced_data__gemmi_parse_cif( + cif_document: gemmi.cif.Document, +) -> None: + """Verify that gemmi can parse the CIF document.""" + assert cif_document is not None + assert len(cif_document) > 0 diff --git a/tests/integration/scipp-analysis/dream/test_scipp-cif.py b/tests/integration/scipp-analysis/dream/test_scipp-cif.py deleted file mode 100644 index 15a49b6f..00000000 --- a/tests/integration/scipp-analysis/dream/test_scipp-cif.py +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-FileCopyrightText: 2021-2026 EasyDiffraction contributors -# SPDX-License-Identifier: BSD-3-Clause - -import numpy as np -from numpy.testing import assert_array_equal -from pooch import retrieve - -import easydiffraction as ed - - -def test_read_tof_cif_from_scipp() -> None: - """ - Test reading a CIF file from scipp - :return: None - """ - - # Retrieve the CIF file - file_path = retrieve( - url="https://pub-6c25ef91903d4301a3338bd53b370098.r2.dev/dream_reduced.cif", - known_hash=None, - ) - - # Add experiment type - expt_type = { - '_expt_type.sample_form': 'powder', - '_expt_type.beam_mode': 'time-of-flight', - '_expt_type.radiation_probe': 'neutron', - '_expt_type.scattering_type': 'bragg', - } - with open(file_path) as f: - content = f.read() - for key, value in expt_type.items(): - if key not in content: - with open(file_path, "a") as f: - f.write(f"{key} {value}\n") - - # Create project - proj = ed.Project() - - # Add experiment from CIF file - proj.experiments.add(cif_path=file_path) - - # Check the experiment names - assert proj.experiments.names == ['reduced_tof'] - - # Alias for easier access - experiment = proj.experiments['reduced_tof'] - - # Check data size - assert experiment.data.x.size == 200 - - # Check some x data points - assert_array_equal( - experiment.data.x[:4], - np.array([ - 57.526660478722604, - 172.57998143616783, - 287.633302393613, - 402.68662335105824, - ]) - ) - assert_array_equal( - experiment.data.x[-2:], - np.array([ - 22838.084210052875, - 22953.137531010318, - ]) - ) - - # Check some measured y data points - #assert experiment.data.meas[93] == 2.0 - - # Check some uncertainty data points - #assert experiment.data.meas_su[93] == 1.4142135623730951 diff --git a/tests/integration/scipp-analysis/dream/test_validate_meta_data.py b/tests/integration/scipp-analysis/dream/test_validate_meta_data.py new file mode 100644 index 00000000..7712dbc3 --- /dev/null +++ b/tests/integration/scipp-analysis/dream/test_validate_meta_data.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Tests for validating metadata structure in CIF files. + +These tests verify that the CIF file contains the expected data blocks, +loops, and calibration parameters required for TOF diffraction analysis. +""" + +import gemmi +import pytest + + +def test_validate_meta_data__block_exists( + cif_document: gemmi.cif.Document, +) -> None: + """Verify that single datablock 'reduced_tof' is present.""" + assert len(cif_document) == 1 + assert cif_document.sole_block().name == 'reduced_tof' + + +def test_validate_meta_data__diffrn_radiation( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _diffrn_radiation.probe is 'neutron'.""" + probe = cif_block.find_value('_diffrn_radiation.probe') + assert probe is not None + assert isinstance(probe, str) + assert probe == 'neutron' + + +def test_validate_meta_data__d_to_tof_loop( + cif_block: gemmi.cif.Block, +) -> None: + """Verify the d_to_tof calibration loop exists with correct + structure. + """ + loop = cif_block.find(['_pd_calib_d_to_tof.id']).loop + assert loop is not None + + # Check loop has exactly 1 row + assert loop.length() == 1 + + # Check all expected columns exist + assert '_pd_calib_d_to_tof.id' in loop.tags + assert '_pd_calib_d_to_tof.power' in loop.tags + assert '_pd_calib_d_to_tof.coeff' in loop.tags + + +def test_validate_meta_data__d_to_tof_difc( + cif_block: gemmi.cif.Block, +) -> None: + """Verify DIFC calibration coefficient is approximately 28385.3.""" + table = cif_block.find([ + '_pd_calib_d_to_tof.id', + '_pd_calib_d_to_tof.power', + '_pd_calib_d_to_tof.coeff', + ]) + loop = table.loop + assert loop is not None + + id_, power, coeff = loop.values + assert id_ == 'DIFC' + assert int(power) == 1 + assert pytest.approx(float(coeff), rel=0.01) == 28385.3 + + +def test_validate_meta_data__data_loop_exists( + cif_block: gemmi.cif.Block, +) -> None: + """Verify the main data loop exists with required columns.""" + loop = cif_block.find(['_pd_data.point_id']).loop + assert loop is not None + + # Check all expected columns exist + assert '_pd_data.point_id' in loop.tags + assert '_pd_meas.time_of_flight' in loop.tags + assert '_pd_proc.intensity_norm' in loop.tags + assert '_pd_proc.intensity_norm_su' in loop.tags diff --git a/tests/integration/scipp-analysis/dream/test_validate_physical_data.py b/tests/integration/scipp-analysis/dream/test_validate_physical_data.py new file mode 100644 index 00000000..f1be5eb3 --- /dev/null +++ b/tests/integration/scipp-analysis/dream/test_validate_physical_data.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 DMSC +"""Tests for validating physical data values in CIF files. + +These tests verify that numerical data columns contain valid, +physically meaningful values (positive TOF, non-negative intensity). +""" + +import gemmi +import numpy as np +import pytest + +# Expected number of data points in the loop (from scipp reduction) +LOOP_SIZE = 2000 + + +def get_column_values( + cif_block: gemmi.cif.Block, + tag: str, +) -> np.ndarray: + """Extract column values from CIF block as numpy array.""" + column = cif_block.find([tag]) + return np.array([float(row[0]) for row in column]) + + +def test_validate_physical_data__data_size( + cif_block: gemmi.cif.Block, +) -> None: + """Verify the data loop contains exactly 2000 points.""" + loop = cif_block.find(['_pd_data.point_id']).loop + assert loop.length() == LOOP_SIZE + + +def test_validate_physical_data__point_id_type( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _pd_data.point_id contains sequential integers.""" + point_ids = get_column_values(cif_block, '_pd_data.point_id') + + # Ensure all point IDs are integer-valued (no fractional part). + frac, _ = np.modf(point_ids) + assert np.all(frac == 0), '_pd_data.point_id values are expected to be integers' + + assert len(point_ids) == LOOP_SIZE + assert point_ids[0] == 0 + assert point_ids[-1] == LOOP_SIZE - 1 + np.testing.assert_array_equal(point_ids, np.arange(LOOP_SIZE, dtype=point_ids.dtype)) + + +def test_validate_physical_data__tof_positive( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _pd_meas.time_of_flight values are positive.""" + tof_values = get_column_values(cif_block, '_pd_meas.time_of_flight') + assert np.all(tof_values > 0), 'TOF values must be positive' + + +def test_validate_physical_data__tof_increasing( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _pd_meas.time_of_flight values are strictly increasing.""" + tof_values = get_column_values(cif_block, '_pd_meas.time_of_flight') + assert np.all(np.diff(tof_values) > 0), 'TOF values must be strictly increasing' + + +def test_validate_physical_data__tof_range( + cif_block: gemmi.cif.Block, +) -> None: + """Verify TOF range spans approx. 8530 to 66504 microseconds.""" + tof_values = get_column_values(cif_block, '_pd_meas.time_of_flight') + assert pytest.approx(tof_values[0], rel=0.01) == 8530.1 + assert pytest.approx(tof_values[-1], rel=0.01) == 66503.7 + + +def test_validate_physical_data__intensity_values( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _pd_proc.intensity_norm values are non-negative.""" + intensity = get_column_values(cif_block, '_pd_proc.intensity_norm') + + assert np.all(intensity >= 0), 'Intensity values must be non-negative' + assert intensity[0] == pytest.approx(0.0, abs=0.01) + assert intensity[-1] == pytest.approx(0.68, rel=0.1) + + +def test_validate_physical_data__intensity_su_values( + cif_block: gemmi.cif.Block, +) -> None: + """Verify _pd_proc.intensity_norm_su values are non-negative.""" + intensity_su = get_column_values(cif_block, '_pd_proc.intensity_norm_su') + + assert np.all(intensity_su >= 0), 'Intensity SU values must be non-negative' + assert intensity_su[0] == pytest.approx(0.0, abs=0.01) + assert intensity_su[-1] == pytest.approx(0.04, rel=0.1)