From 37d3dea34e7803f321b4231138ef765949dafcec Mon Sep 17 00:00:00 2001 From: morobking Date: Mon, 3 Nov 2025 17:35:36 +0000 Subject: [PATCH 01/12] change V1 to V3 and commetn out faPAR for now --- .../cmor_config/CDS-SATELLITE-LAI-FAPAR.yml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml index 998dbe9944..de365aa7f5 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml @@ -4,24 +4,27 @@ attributes: dataset_id: CDS-SATELLITE-LAI-FAPAR project_id: OBS tier: 3 - version: 'V1' # Version as listed on source + version: 'V3' # Version as listed on source modeling_realm: sat source: 'https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar' reference: 'cds-satellite-lai-fapar' comment: | 'Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1998 to present' + start_year: 2000 + end_year: 2001 + # Variables to CMORize variables: lai: mip: Lmon raw: LAI - file: 'c3s_LAI_*_GLOBE_VGT_V1.0.1.nc' - fapar: - mip: Lmon - raw: fAPAR - file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc' + file: 'c3s_LAI_*_GLOBE_VGT_V3.0.1.nc' +# fapar: +# mip: Lmon +# raw: fAPAR +# file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc' # Parameters -custom: - regrid_resolution: '0.25x0.25' +#custom: +# regrid_resolution: '0.25x0.25' From e205106f3c581aa5422c17b0708a6da1d8b99a3d Mon Sep 17 00:00:00 2001 From: morobking Date: Mon, 3 Nov 2025 18:30:40 +0000 Subject: [PATCH 02/12] file stripped back, and independent load function for year and month written --- .../datasets/cds_satellite_lai_fapar.py | 206 ++++-------------- 1 file changed, 45 insertions(+), 161 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index e3e6ee3045..9d5740fd87 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -7,6 +7,7 @@ Last access 20190703 +NEEDED TO UPDATE THIS!!! Download and processing instructions - Open in a browser the data source as specified above - Put the right ticks: @@ -51,137 +52,33 @@ logger = logging.getLogger(__name__) -def _attrs_are_the_same(cubelist): - # assume they are the same - attrs_the_same = True - allattrs = cubelist[0].attributes - for key in allattrs: - try: - unique_attr_vals = {cube.attributes[key] for cube in cubelist} - # This exception is needed for valid_range, which is an - # array and therefore not hashable - except TypeError: - unique_attr_vals = { - tuple(cube.attributes[key]) for cube in cubelist - } - if len(unique_attr_vals) > 1: - attrs_the_same = False - print( - f"Different values found for {key}-attribute: " - f"{unique_attr_vals}" - ) - return attrs_the_same - - -def _cmorize_dataset(in_file, var, cfg, out_dir): - logger.info( - "CMORizing variable '%s' from input file '%s'", - var["short_name"], - in_file, - ) - attributes = deepcopy(cfg["attributes"]) - attributes["mip"] = var["mip"] - - cmor_table = cfg["cmor_table"] - definition = cmor_table.get_variable(var["mip"], var["short_name"]) - - cube = iris.load_cube( - str(in_file), constraint=NameConstraint(var_name=var["raw"]) - ) - - # Set correct names - cube.var_name = definition.short_name - if definition.standard_name: - cube.standard_name = definition.standard_name - - cube.long_name = definition.long_name - - # Convert units if required - cube.convert_units(definition.units) - - # Set global attributes - utils.set_global_atts(cube, attributes) - logger.info("Saving CMORized cube for variable %s", cube.var_name) - utils.save_variable(cube, cube.var_name, out_dir, attributes) - return in_file - - -def _regrid_dataset(in_dir, var, cfg): - """Regridding of original files. +def load_callback(cube, field, filename): + """ + Callback fucntion for iris.load to remove all attributes from cube + so they will concatenate into a single cube + """ + cube.attributes = None - This function regrids each file and write to disk appending 'regrid' - in front of filename. +def load_dataset(in_dir, var, cfg, year, month): + """ """ filelist = glob.glob(os.path.join(in_dir, var["file"])) - for infile in filelist: - _, infile_tail = os.path.split(infile) - outfile_tail = infile_tail.replace("c3s", "c3s_regridded") - outfile = os.path.join(cfg["work_dir"], outfile_tail) - with catch_warnings(): - filterwarnings( - action="ignore", - # Full message: - # UserWarning: Skipping global attribute 'long_name': - # 'long_name' is not a permitted attribute - message="Skipping global attribute 'long_name'", - category=UserWarning, - module="iris", - ) - lai_cube = iris.load_cube( - infile, constraint=NameConstraint(var_name=var["raw"]) + this_month_year_files = [] + for file in filelist: + if f"{year}{month:02d}" in file: + this_month_year_files.append(file) + + lai_cube = iris.load(this_month_year_files, + NameConstraint(var_name=var["raw"]), + callback=load_callback, ) - lai_cube = regrid( - lai_cube, cfg["custom"]["regrid_resolution"], "nearest" - ) - logger.info("Saving: %s", outfile) + + return lai_cube.concatenate_cube() - iris.save(lai_cube, outfile) -def _set_time_bnds(in_dir, var): - """Set time_bnds by using attribute and returns a cubelist.""" - # This is a complicated expression, but necessary to keep local - # variables below the limit, otherwise prospector complains. - cubelist = iris.load( - glob.glob( - os.path.join(in_dir, var["file"].replace("c3s", "c3s_regridded")) - ) - ) - - # The purpose of the following loop is to remove any attributes - # that differ between cubes (otherwise concatenation over time fails). - # In addition, care is taken of the time coordinate, by adding the - # time_coverage attributes as time_bnds to the time coordinate. - for n_cube, _ in enumerate(cubelist): - time_coverage_start = cubelist[n_cube].attributes.pop( - "time_coverage_start" - ) - time_coverage_end = cubelist[n_cube].attributes.pop( - "time_coverage_end" - ) - - # Now put time_coverage_start/end as time_bnds - # Convert time_coverage_xxxx to datetime - bnd_a = datetime.strptime(time_coverage_start, "%Y-%m-%dT%H:%M:%SZ") - bnd_b = datetime.strptime(time_coverage_end, "%Y-%m-%dT%H:%M:%SZ") - - # Put in shape for time_bnds - time_bnds_datetime = [bnd_a, bnd_b] - - # Read dataset time unit and calendar from file - dataset_time_unit = str(cubelist[n_cube].coord("time").units) - dataset_time_calender = cubelist[n_cube].coord("time").units.calendar - # Convert datetime - time_bnds = cf_units.date2num( - time_bnds_datetime, dataset_time_unit, dataset_time_calender - ) - # Put them on the file - cubelist[n_cube].coord("time").bounds = time_bnds - - return cubelist - def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" @@ -195,46 +92,33 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): ) os.mkdir(cfg["work_dir"]) + # add loops for month and year + # this is to liit amount of data in memory so the full 10 day + # resolution can be CMORized + for short_name, var in cfg["variables"].items(): var["short_name"] = short_name logger.info("Processing var %s", short_name) - # Regridding - logger.info( - "Start regridding to: %s", cfg["custom"]["regrid_resolution"] - ) - _regrid_dataset(in_dir, var, cfg) - logger.info("Finished regridding") - - # File concatenation - logger.info("Start setting time_bnds") - cubelist = _set_time_bnds(cfg["work_dir"], var) - - # Loop over two different platform names - for platformname in ["SPOT-4", "SPOT-5"]: - # Now split the cubelist on the different platform - logger.info("Start processing part of dataset: %s", platformname) - cubelist_platform = cubelist.extract( - iris.AttributeConstraint(platform=platformname) - ) - for n_cube, _ in enumerate(cubelist_platform): - cubelist_platform[n_cube].attributes.pop("identifier") - if cubelist_platform: - assert _attrs_are_the_same(cubelist_platform) - cube = cubelist_platform.concatenate_cube() - else: - logger.warning( - "No files found for platform %s \ - (check input data)", - platformname, - ) - continue - savename = os.path.join( - cfg["work_dir"], var["short_name"] + platformname + ".nc" - ) - logger.info("Saving as: %s", savename) - iris.save(cube, savename) - logger.info("Finished file concatenation over time") - logger.info("Start CMORization of file %s", savename) - _cmorize_dataset(savename, var, cfg, out_dir) - logger.info("Finished regridding and CMORizing %s", savename) + for year in range(cfg["attributes"]["start_year"], + cfg["attributes"]["end_year"]): + + # while testing: + if year>2000: continue + + for month in range(1,13): + + # while testing: + if month > 2: continue + + logger.info(f"Working with year {year}, month {month}") + + # Load orginal data in an indendent function + lai_cube = load_dataset(in_dir, var, cfg, year, month) + print(lai_cube) + + + # regrid + # time bounds + # cmorize + From bfaeba8978c857d5083ff88bd13cc2a440185b2d Mon Sep 17 00:00:00 2001 From: morobking Date: Tue, 4 Nov 2025 12:29:47 +0000 Subject: [PATCH 03/12] regridder working --- .../cmor_config/CDS-SATELLITE-LAI-FAPAR.yml | 6 +++--- .../datasets/cds_satellite_lai_fapar.py | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml index de365aa7f5..245a36baa5 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml @@ -25,6 +25,6 @@ variables: # raw: fAPAR # file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc' -# Parameters -#custom: -# regrid_resolution: '0.25x0.25' +Parameters: + custom: + regrid_resolution: '0.25x0.25' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index 9d5740fd87..a592175c76 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -95,7 +95,10 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # add loops for month and year # this is to liit amount of data in memory so the full 10 day # resolution can be CMORized - + + logger.info(f"{cfg=}") + logger.info(f"{cfg['Parameters']['custom']['regrid_resolution']=}") + for short_name, var in cfg["variables"].items(): var["short_name"] = short_name logger.info("Processing var %s", short_name) @@ -117,8 +120,19 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube = load_dataset(in_dir, var, cfg, year, month) print(lai_cube) + # Regrdding + # uses nearest neighbour, skips if resolution = None + resolution = cfg["Parameters"]["custom"]["regrid_resolution"] + if resolution == "None": + pass + else: + lai_cube = regrid( + lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" + ) - # regrid + print(lai_cube) + + # time bounds # cmorize From 86071ae94a7473aaafe556adbb29853ed30ed6eb Mon Sep 17 00:00:00 2001 From: morobking Date: Tue, 4 Nov 2025 13:06:21 +0000 Subject: [PATCH 04/12] time bounds added with guess_bounds --- .../data/formatters/datasets/cds_satellite_lai_fapar.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index a592175c76..dd3a546256 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -77,9 +77,6 @@ def load_dataset(in_dir, var, cfg, year, month): return lai_cube.concatenate_cube() - - - def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" # run the cmorization @@ -130,7 +127,9 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" ) - print(lai_cube) + # This sets time bounds without needing extra loops and checks + lai_cube.coord('time').guess_bounds() + print(lai_cube.coord('time')) # time bounds From d2945d1d0a37450cd4930a51b63566d93768f844 Mon Sep 17 00:00:00 2001 From: morobking Date: Tue, 4 Nov 2025 13:20:54 +0000 Subject: [PATCH 05/12] probably too many mistakes in this version, need to revert --- .../datasets/cds_satellite_lai_fapar.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index dd3a546256..c31e74384f 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -63,6 +63,7 @@ def load_callback(cube, field, filename): def load_dataset(in_dir, var, cfg, year, month): """ + Load the files from an individual month """ filelist = glob.glob(os.path.join(in_dir, var["file"])) this_month_year_files = [] @@ -77,7 +78,26 @@ def load_dataset(in_dir, var, cfg, year, month): return lai_cube.concatenate_cube() -def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): +def _cmorize_dataset(cube, var, cfg): + + cmor_table = cfg["cmor_table"] + definition = cmor_table.get_variable(var["mip"], var["short_name"]) + + # standard name + # long name + cube.var_name = definition.short_name + if definition.standard_name: + cube.standard_name = definition.standard_name + + cube.long_name = definition.long_name + + # units + cube.convert_units(definition.units) + + return cube + + +def cmorization(cube, var): """Cmorization func call.""" # run the cmorization # Pass on the workdir to the cfg dictionary @@ -89,10 +109,6 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): ) os.mkdir(cfg["work_dir"]) - # add loops for month and year - # this is to liit amount of data in memory so the full 10 day - # resolution can be CMORized - logger.info(f"{cfg=}") logger.info(f"{cfg['Parameters']['custom']['regrid_resolution']=}") @@ -127,11 +143,15 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" ) + # time bounds # This sets time bounds without needing extra loops and checks lai_cube.coord('time').guess_bounds() print(lai_cube.coord('time')) + # cmorize + lai_cube = _cmorize_dataset(lai_cube, var, cfg) + + print(lai_cube) + + # save cube - # time bounds - # cmorize - From 2bfa1f01778ccd3758ef6eea7396b8c517303404 Mon Sep 17 00:00:00 2001 From: morobking Date: Tue, 4 Nov 2025 15:47:23 +0000 Subject: [PATCH 06/12] fixed mistake --- .../data/formatters/datasets/cds_satellite_lai_fapar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index c31e74384f..ceabf188c8 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -97,7 +97,7 @@ def _cmorize_dataset(cube, var, cfg): return cube -def cmorization(cube, var): +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" # run the cmorization # Pass on the workdir to the cfg dictionary From 5dd44121e124ff566bb73d755362cdd0e8ab25fc Mon Sep 17 00:00:00 2001 From: morobking Date: Fri, 14 Nov 2025 12:15:53 +0000 Subject: [PATCH 07/12] final testing version, regridding runs out of memory even at 256 Gb --- .../datasets/cds_satellite_lai_fapar.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index ceabf188c8..85a4b096d2 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -51,9 +51,6 @@ logger = logging.getLogger(__name__) - - - def load_callback(cube, field, filename): """ Callback fucntion for iris.load to remove all attributes from cube @@ -68,12 +65,12 @@ def load_dataset(in_dir, var, cfg, year, month): filelist = glob.glob(os.path.join(in_dir, var["file"])) this_month_year_files = [] for file in filelist: - if f"{year}{month:02d}" in file: - this_month_year_files.append(file) + if f"{year}{month:02d}" in file: + this_month_year_files.append(file) lai_cube = iris.load(this_month_year_files, - NameConstraint(var_name=var["raw"]), - callback=load_callback, + NameConstraint(var_name=var["raw"]), + callback=load_callback, ) return lai_cube.concatenate_cube() @@ -117,7 +114,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): logger.info("Processing var %s", short_name) for year in range(cfg["attributes"]["start_year"], - cfg["attributes"]["end_year"]): + cfg["attributes"]["end_year"]): # while testing: if year>2000: continue @@ -137,21 +134,26 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # uses nearest neighbour, skips if resolution = None resolution = cfg["Parameters"]["custom"]["regrid_resolution"] if resolution == "None": - pass + logger.info("No regridding") else: + logger.info(f"Regridding {cfg["Parameters"]["custom"]["regrid_resolution"]}") lai_cube = regrid( lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" - ) - + ) + print(lai_cube) + iris.save(lai_cube, '/data/scratch/rob.king/text.nc') + print(0/0) # time bounds # This sets time bounds without needing extra loops and checks lai_cube.coord('time').guess_bounds() - print(lai_cube.coord('time')) # cmorize lai_cube = _cmorize_dataset(lai_cube, var, cfg) - print(lai_cube) - # save cube - + logger.info(f"Saving CMORized cube for variable {lai_cube.var_name}") + # these should all be the same + attributes = cfg["attributes"] + attributes["mip"] = var["mip"] + utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) + logger.info(f"SAVED") From 015675fbeb03ee769320b3dbe4640c67b596910e Mon Sep 17 00:00:00 2001 From: morobking Date: Fri, 14 Nov 2025 15:09:13 +0000 Subject: [PATCH 08/12] This version successfully made one year's worth of files --- .../data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml | 2 +- .../formatters/datasets/cds_satellite_lai_fapar.py | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml index 245a36baa5..89b3a2edbb 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml @@ -27,4 +27,4 @@ variables: Parameters: custom: - regrid_resolution: '0.25x0.25' + regrid_resolution: 'None' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index 85a4b096d2..59d149b14b 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -116,19 +116,12 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): for year in range(cfg["attributes"]["start_year"], cfg["attributes"]["end_year"]): - # while testing: - if year>2000: continue - for month in range(1,13): - # while testing: - if month > 2: continue - logger.info(f"Working with year {year}, month {month}") # Load orginal data in an indendent function lai_cube = load_dataset(in_dir, var, cfg, year, month) - print(lai_cube) # Regrdding # uses nearest neighbour, skips if resolution = None @@ -140,9 +133,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube = regrid( lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" ) - print(lai_cube) - iris.save(lai_cube, '/data/scratch/rob.king/text.nc') - print(0/0) + # time bounds # This sets time bounds without needing extra loops and checks lai_cube.coord('time').guess_bounds() @@ -155,5 +146,4 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # these should all be the same attributes = cfg["attributes"] attributes["mip"] = var["mip"] - utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) - logger.info(f"SAVED") + utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) \ No newline at end of file From b412483c07163b5e420db1ddf32c36bbf5f13225 Mon Sep 17 00:00:00 2001 From: morobking Date: Fri, 16 Jan 2026 18:11:33 +0000 Subject: [PATCH 09/12] version with loop to add 'nan' cubes on non-days. This has memeory issues. see issue #4280 --- .../cmor_config/CDS-SATELLITE-LAI-FAPAR.yml | 2 +- .../datasets/cds_satellite_lai_fapar.py | 50 +++++++++++++++++-- esmvaltool/recipes/examples/recipe_python.yml | 2 +- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml index 89b3a2edbb..670eeedc6e 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml @@ -17,7 +17,7 @@ attributes: # Variables to CMORize variables: lai: - mip: Lmon + mip: Eday #Lmon raw: LAI file: 'c3s_LAI_*_GLOBE_VGT_V3.0.1.nc' # fapar: diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index 59d149b14b..e983203df0 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -41,6 +41,8 @@ from copy import deepcopy from datetime import datetime from warnings import catch_warnings, filterwarnings +import calendar +import numpy as np import cf_units import iris @@ -122,9 +124,10 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # Load orginal data in an indendent function lai_cube = load_dataset(in_dir, var, cfg, year, month) - + # Regrdding # uses nearest neighbour, skips if resolution = None + # This uses a huge amount of resource - be careful resolution = cfg["Parameters"]["custom"]["regrid_resolution"] if resolution == "None": logger.info("No regridding") @@ -140,10 +143,51 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # cmorize lai_cube = _cmorize_dataset(lai_cube, var, cfg) - + logger.info(f"********{lai_cube=}") + + # make a daily version with Nan cubes for missing days + # This will work with 10-day CDS data and 5-day CCI data in updates at a later date + days_in_month = calendar.monthrange(year, month)[1] + time_coord = lai_cube.coord('time') + time_values = time_coord.points + dts = time_coord.units.num2date(time_values) + days = [item.day for item in dts] + + output = iris.cube.CubeList([]) + for day in range(1, days_in_month + 1): + if day in days: + logger.info(f"{day} is in CUBES") + output.append(lai_cube[days.index(day)]) + else: + logger.info(f"{day} NOT in CUBES") + nan_cube = _create_nan_cube(lai_cube[0], year, month, day) + output.append(nan_cube) + + logger.info(f"{output=}") + print(0/0) # save cube logger.info(f"Saving CMORized cube for variable {lai_cube.var_name}") # these should all be the same attributes = cfg["attributes"] attributes["mip"] = var["mip"] - utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) \ No newline at end of file + utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) + +# from CCI SNOW CMORISER +def _create_nan_cube(cube, year, month, day): + """Create cube containing only nan from existing cube.""" + nan_cube = cube.copy() + nan_cube.data = np.full_like(nan_cube.data, np.nan, dtype=np.float32) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(nan_cube.coord("time").units) + dataset_time_calender = nan_cube.coord("time").units.calendar + + # Convert datetime + newtime = datetime.datetime(year=year, month=month, day=day) + newtime = cf_units.date2num( + newtime, dataset_time_unit, dataset_time_calender + ) + + nan_cube.coord("time").points = np.float32(newtime) + + return nan_cube \ No newline at end of file diff --git a/esmvaltool/recipes/examples/recipe_python.yml b/esmvaltool/recipes/examples/recipe_python.yml index d85e1ae437..d9aa7338f6 100644 --- a/esmvaltool/recipes/examples/recipe_python.yml +++ b/esmvaltool/recipes/examples/recipe_python.yml @@ -41,7 +41,7 @@ preprocessors: annual_mean_amsterdam: extract_location: - location: Amsterdam + location: London scheme: linear annual_statistics: operator: mean From 75833b0f8a1e0992bcb9e9fa3bc3667ce8c07a2b Mon Sep 17 00:00:00 2001 From: morobking Date: Fri, 16 Jan 2026 19:25:30 +0000 Subject: [PATCH 10/12] Dask version of nan cube --- .../datasets/cds_satellite_lai_fapar.py | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index e983203df0..0f866f46b7 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -43,6 +43,7 @@ from warnings import catch_warnings, filterwarnings import calendar import numpy as np +import dask.array as da import cf_units import iris @@ -160,17 +161,20 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): output.append(lai_cube[days.index(day)]) else: logger.info(f"{day} NOT in CUBES") - nan_cube = _create_nan_cube(lai_cube[0], year, month, day) + # nan_cube = _create_nan_cube(lai_cube[0], year, month, day) + nan_cube = create_dask_cube(lai_cube[0], year, month, day) output.append(nan_cube) + output = output.concatenate_cube() logger.info(f"{output=}") - print(0/0) + # save cube - logger.info(f"Saving CMORized cube for variable {lai_cube.var_name}") + logger.info(f"Saving CMORized cube for variable {output.var_name}") # these should all be the same attributes = cfg["attributes"] attributes["mip"] = var["mip"] - utils.save_variable(lai_cube, lai_cube.var_name, out_dir, attributes) + utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True) + print(0/0) # from CCI SNOW CMORISER def _create_nan_cube(cube, year, month, day): @@ -190,4 +194,24 @@ def _create_nan_cube(cube, year, month, day): nan_cube.coord("time").points = np.float32(newtime) - return nan_cube \ No newline at end of file + return nan_cube + +def create_dask_cube(cube, year, month, day): + nan_da = da.full(cube.shape, np.nan, + chunks='auto', dtype=np.float32) + + new_cube = cube.copy() + new_cube.data = nan_da + + dataset_time_unit = str(new_cube.coord("time").units) + dataset_time_calender = new_cube.coord("time").units.calendar + + # Convert datetime + newtime = datetime(year=year, month=month, day=day) + newtime = cf_units.date2num( + newtime, dataset_time_unit, dataset_time_calender + ) + + new_cube.coord("time").points = np.float32(newtime) + + return new_cube \ No newline at end of file From 82f77a7573d655369ae4c76fc87b2eb413510e43 Mon Sep 17 00:00:00 2001 From: morobking Date: Mon, 19 Jan 2026 15:18:14 +0000 Subject: [PATCH 11/12] Dask for nan arrays, new axis for cubelist, cmor commands moved to later in process to work on one big cube --- .../datasets/cds_satellite_lai_fapar.py | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index 0f866f46b7..ed70299495 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -119,6 +119,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): for year in range(cfg["attributes"]["start_year"], cfg["attributes"]["end_year"]): + output = iris.cube.CubeList([]) for month in range(1,13): logger.info(f"Working with year {year}, month {month}") @@ -138,13 +139,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" ) - # time bounds - # This sets time bounds without needing extra loops and checks - lai_cube.coord('time').guess_bounds() - - # cmorize - lai_cube = _cmorize_dataset(lai_cube, var, cfg) - logger.info(f"********{lai_cube=}") + # make a daily version with Nan cubes for missing days # This will work with 10-day CDS data and 5-day CCI data in updates at a later date @@ -154,27 +149,41 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): dts = time_coord.units.num2date(time_values) days = [item.day for item in dts] - output = iris.cube.CubeList([]) + # lai cube 0 is the problem, need the zeroth time step form the cuble of 3 time! ##################### for day in range(1, days_in_month + 1): if day in days: logger.info(f"{day} is in CUBES") - output.append(lai_cube[days.index(day)]) + #iris.util.new_axis(lai_cube, 'time') + new_cube = iris.util.new_axis(lai_cube[days.index(day)], 'time') + output.append(new_cube) else: logger.info(f"{day} NOT in CUBES") # nan_cube = _create_nan_cube(lai_cube[0], year, month, day) + logger.info(f"{lai_cube[0]=}") nan_cube = create_dask_cube(lai_cube[0], year, month, day) - output.append(nan_cube) + new_cube = iris.util.new_axis(nan_cube, 'time') + output.append(new_cube) + + logger.info(f"{output=}") output = output.concatenate_cube() logger.info(f"{output=}") + # time bounds + # This sets time bounds without needing extra loops and checks + output.coord('time').guess_bounds() + + # cmorize + output = _cmorize_dataset(output, var, cfg) + #logger.info(f"********{lai_cube=}") + #print(0/0) # save cube logger.info(f"Saving CMORized cube for variable {output.var_name}") # these should all be the same attributes = cfg["attributes"] attributes["mip"] = var["mip"] utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True) - print(0/0) + #print(0/0) # from CCI SNOW CMORISER def _create_nan_cube(cube, year, month, day): @@ -192,7 +201,7 @@ def _create_nan_cube(cube, year, month, day): newtime, dataset_time_unit, dataset_time_calender ) - nan_cube.coord("time").points = np.float32(newtime) + nan_cube.coord("time").points = np.float64(newtime) return nan_cube @@ -212,6 +221,6 @@ def create_dask_cube(cube, year, month, day): newtime, dataset_time_unit, dataset_time_calender ) - new_cube.coord("time").points = np.float32(newtime) + new_cube.coord("time").points = np.float64(newtime) return new_cube \ No newline at end of file From d008db0a0c9c42322212eb22e020dc193d2f8d5c Mon Sep 17 00:00:00 2001 From: morobking Date: Tue, 20 Jan 2026 14:54:14 +0000 Subject: [PATCH 12/12] Code tidy --- .../datasets/cds_satellite_lai_fapar.py | 62 ++++++------------- 1 file changed, 19 insertions(+), 43 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index ed70299495..93e2213b84 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -5,9 +5,9 @@ Source https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar?tab=form Last access - 20190703 + 20260120 -NEEDED TO UPDATE THIS!!! +NEEDED TO UPDATE THIS for V3!!! Download and processing instructions - Open in a browser the data source as specified above - Put the right ticks: @@ -33,14 +33,14 @@ Modification history 20200512-crezee_bas: adapted to reflect changes in download form by CDS. 20190703-crezee_bas: written. + + 20260120: updates to support all three days of data per month """ import glob import logging import os -from copy import deepcopy from datetime import datetime -from warnings import catch_warnings, filterwarnings import calendar import numpy as np import dask.array as da @@ -83,8 +83,6 @@ def _cmorize_dataset(cube, var, cfg): cmor_table = cfg["cmor_table"] definition = cmor_table.get_variable(var["mip"], var["short_name"]) - # standard name - # long name cube.var_name = definition.short_name if definition.standard_name: cube.standard_name = definition.standard_name @@ -108,10 +106,7 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): "Creating working directory for regridding: %s", cfg["work_dir"] ) os.mkdir(cfg["work_dir"]) - - logger.info(f"{cfg=}") - logger.info(f"{cfg['Parameters']['custom']['regrid_resolution']=}") - + for short_name, var in cfg["variables"].items(): var["short_name"] = short_name logger.info("Processing var %s", short_name) @@ -119,9 +114,9 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): for year in range(cfg["attributes"]["start_year"], cfg["attributes"]["end_year"]): - output = iris.cube.CubeList([]) - for month in range(1,13): - + + for month in range(1,13): + output = iris.cube.CubeList([]) logger.info(f"Working with year {year}, month {month}") # Load orginal data in an indendent function @@ -139,8 +134,6 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" ) - - # make a daily version with Nan cubes for missing days # This will work with 10-day CDS data and 5-day CCI data in updates at a later date days_in_month = calendar.monthrange(year, month)[1] @@ -149,25 +142,18 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): dts = time_coord.units.num2date(time_values) days = [item.day for item in dts] - # lai cube 0 is the problem, need the zeroth time step form the cuble of 3 time! ##################### for day in range(1, days_in_month + 1): if day in days: logger.info(f"{day} is in CUBES") - #iris.util.new_axis(lai_cube, 'time') new_cube = iris.util.new_axis(lai_cube[days.index(day)], 'time') output.append(new_cube) else: logger.info(f"{day} NOT in CUBES") - # nan_cube = _create_nan_cube(lai_cube[0], year, month, day) - logger.info(f"{lai_cube[0]=}") nan_cube = create_dask_cube(lai_cube[0], year, month, day) new_cube = iris.util.new_axis(nan_cube, 'time') output.append(new_cube) - logger.info(f"{output=}") - output = output.concatenate_cube() - logger.info(f"{output=}") # time bounds # This sets time bounds without needing extra loops and checks @@ -175,37 +161,27 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): # cmorize output = _cmorize_dataset(output, var, cfg) - #logger.info(f"********{lai_cube=}") - #print(0/0) + # save cube logger.info(f"Saving CMORized cube for variable {output.var_name}") # these should all be the same attributes = cfg["attributes"] attributes["mip"] = var["mip"] utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True) - #print(0/0) - -# from CCI SNOW CMORISER -def _create_nan_cube(cube, year, month, day): - """Create cube containing only nan from existing cube.""" - nan_cube = cube.copy() - nan_cube.data = np.full_like(nan_cube.data, np.nan, dtype=np.float32) - # Read dataset time unit and calendar from file - dataset_time_unit = str(nan_cube.coord("time").units) - dataset_time_calender = nan_cube.coord("time").units.calendar - # Convert datetime - newtime = datetime.datetime(year=year, month=month, day=day) - newtime = cf_units.date2num( - newtime, dataset_time_unit, dataset_time_calender - ) - - nan_cube.coord("time").points = np.float64(newtime) +def create_dask_cube(cube, year, month, day): + """Create a cube of NaNs for missing days. - return nan_cube + Args: + cube (int): Cube with target shape and coordinates to copy + year (int): Year for time point + month (int): Month for time point + day (int): Day for time point -def create_dask_cube(cube, year, month, day): + Returns: + cube: A 1xlatxlon cube of NaNs + """ nan_da = da.full(cube.shape, np.nan, chunks='auto', dtype=np.float32)