diff --git a/src/qcodes/dataset/data_set.py b/src/qcodes/dataset/data_set.py index c58aef15ff7c..0bd0d101dc40 100644 --- a/src/qcodes/dataset/data_set.py +++ b/src/qcodes/dataset/data_set.py @@ -13,8 +13,9 @@ from threading import Thread from typing import TYPE_CHECKING, Any, Literal +import cf_xarray as cfxr import numpy -from tqdm.auto import trange +from tqdm.auto import tqdm import qcodes from qcodes.dataset.data_set_protocol import ( @@ -246,7 +247,8 @@ def __init__( #: In memory representation of the data in the dataset. self._cache: DataSetCacheWithDBBackend = DataSetCacheWithDBBackend(self) self._results: list[dict[str, VALUE]] = [] - self._in_memory_cache = in_memory_cache + self._in_memory_cache: bool = in_memory_cache + self._max_num_files_export = 100 self._export_limit = 1000 if run_id is not None: @@ -1484,6 +1486,21 @@ def _set_export_info(self, export_info: ExportInfo) -> None: def _export_as_netcdf(self, path: Path, file_name: str) -> Path: """Export data as netcdf to a given path with file prefix""" + + def generate_steps(num_rows, max_num_steps) -> list[tuple[int, int]]: + if max_num_steps >= num_rows: + return [(i + 1, i + 1) for i in range(num_rows)] + + step_size, remainder = divmod(num_rows, max_num_steps) + limits = [ + (i * step_size + 1, (i + 1) * step_size) for i in range(max_num_steps) + ] + + if remainder > 0: + limits[-1] = (limits[-1][0], (step_size) * max_num_steps + remainder) + + return limits + import xarray as xr file_path = path / file_name @@ -1514,16 +1531,22 @@ def _export_as_netcdf(self, path: Path, file_name: str) -> Path: "temp_dir": temp_dir, }, ) - num_files = len(self) + num_rows = len(self) + steps = generate_steps(num_rows, self._max_num_files_export) + num_files = len(steps) num_digits = len(str(num_files)) file_name_template = f"ds_{{:0{num_digits}d}}.nc" - for i in trange(num_files, desc="Writing individual files"): + for i, (start, stop) in tqdm( + enumerate(steps), total=num_files, desc="Writing individual files" + ): xarray_to_h5netcdf_with_complex_numbers( - self.to_xarray_dataset(start=i + 1, end=i + 1), + self.to_xarray_dataset(start=start, end=stop), temp_path / file_name_template.format(i), ) files = tuple(temp_path.glob("*.nc")) - data = xr.open_mfdataset(files) + data = xr.open_mfdataset( + files, preprocess=cfxr.coding.decode_compress_to_multi_index + ) try: log.info( "Combining temp files into one file.", diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index f9f00195aff0..d38c7cd86ad2 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -5,9 +5,11 @@ import os from pathlib import Path +import hypothesis.strategies as hst import numpy as np import pytest import xarray as xr +from hypothesis import HealthCheck, given, settings from numpy.testing import assert_allclose from pytest import LogCaptureFixture, TempPathFactory @@ -826,11 +828,17 @@ def test_export_dataset_small_no_delated( assert "Writing netcdf file directly" in caplog.records[0].msg +@settings( + deadline=None, + suppress_health_check=(HealthCheck.function_scoped_fixture,), +) +@given(max_num_files=hst.integers(min_value=1, max_value=55)) def test_export_dataset_delayed_numeric( - tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet, caplog + max_num_files, tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet, caplog ) -> None: - tmp_path = tmp_path_factory.mktemp("export_netcdf") + tmp_path = tmp_path_factory.mktemp(f"export_netcdf_{max_num_files}") mock_dataset_grid._export_limit = 0 + mock_dataset_grid._max_num_files_export = max_num_files with caplog.at_level(logging.INFO): mock_dataset_grid.export(export_type="netcdf", path=tmp_path, prefix="qcodes_")