diff --git a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml index 998dbe9944..670eeedc6e 100644 --- a/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml +++ b/esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml @@ -4,24 +4,27 @@ attributes: dataset_id: CDS-SATELLITE-LAI-FAPAR project_id: OBS tier: 3 - version: 'V1' # Version as listed on source + version: 'V3' # Version as listed on source modeling_realm: sat source: 'https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar' reference: 'cds-satellite-lai-fapar' comment: | 'Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1998 to present' + start_year: 2000 + end_year: 2001 + # Variables to CMORize variables: lai: - mip: Lmon + mip: Eday #Lmon raw: LAI - file: 'c3s_LAI_*_GLOBE_VGT_V1.0.1.nc' - fapar: - mip: Lmon - raw: fAPAR - file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc' + file: 'c3s_LAI_*_GLOBE_VGT_V3.0.1.nc' +# fapar: +# mip: Lmon +# raw: fAPAR +# file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc' -# Parameters -custom: - regrid_resolution: '0.25x0.25' +Parameters: + custom: + regrid_resolution: 'None' diff --git a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py index e3e6ee3045..93e2213b84 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/cds_satellite_lai_fapar.py @@ -5,8 +5,9 @@ Source https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar?tab=form Last access - 20190703 + 20260120 +NEEDED TO UPDATE THIS for V3!!! Download and processing instructions - Open in a browser the data source as specified above - Put the right ticks: @@ -32,14 +33,17 @@ Modification history 20200512-crezee_bas: adapted to reflect changes in download form by CDS. 20190703-crezee_bas: written. + + 20260120: updates to support all three days of data per month """ import glob import logging import os -from copy import deepcopy from datetime import datetime -from warnings import catch_warnings, filterwarnings +import calendar +import numpy as np +import dask.array as da import cf_units import iris @@ -50,137 +54,45 @@ logger = logging.getLogger(__name__) +def load_callback(cube, field, filename): + """ + Callback fucntion for iris.load to remove all attributes from cube + so they will concatenate into a single cube + """ + cube.attributes = None -def _attrs_are_the_same(cubelist): - # assume they are the same - attrs_the_same = True - allattrs = cubelist[0].attributes - for key in allattrs: - try: - unique_attr_vals = {cube.attributes[key] for cube in cubelist} - # This exception is needed for valid_range, which is an - # array and therefore not hashable - except TypeError: - unique_attr_vals = { - tuple(cube.attributes[key]) for cube in cubelist - } - if len(unique_attr_vals) > 1: - attrs_the_same = False - print( - f"Different values found for {key}-attribute: " - f"{unique_attr_vals}" +def load_dataset(in_dir, var, cfg, year, month): + """ + Load the files from an individual month + """ + filelist = glob.glob(os.path.join(in_dir, var["file"])) + this_month_year_files = [] + for file in filelist: + if f"{year}{month:02d}" in file: + this_month_year_files.append(file) + + lai_cube = iris.load(this_month_year_files, + NameConstraint(var_name=var["raw"]), + callback=load_callback, ) - return attrs_the_same + + return lai_cube.concatenate_cube() - -def _cmorize_dataset(in_file, var, cfg, out_dir): - logger.info( - "CMORizing variable '%s' from input file '%s'", - var["short_name"], - in_file, - ) - attributes = deepcopy(cfg["attributes"]) - attributes["mip"] = var["mip"] +def _cmorize_dataset(cube, var, cfg): cmor_table = cfg["cmor_table"] definition = cmor_table.get_variable(var["mip"], var["short_name"]) - - cube = iris.load_cube( - str(in_file), constraint=NameConstraint(var_name=var["raw"]) - ) - - # Set correct names + cube.var_name = definition.short_name if definition.standard_name: cube.standard_name = definition.standard_name cube.long_name = definition.long_name - # Convert units if required + # units cube.convert_units(definition.units) - # Set global attributes - utils.set_global_atts(cube, attributes) - - logger.info("Saving CMORized cube for variable %s", cube.var_name) - utils.save_variable(cube, cube.var_name, out_dir, attributes) - - return in_file - - -def _regrid_dataset(in_dir, var, cfg): - """Regridding of original files. - - This function regrids each file and write to disk appending 'regrid' - in front of filename. - """ - filelist = glob.glob(os.path.join(in_dir, var["file"])) - for infile in filelist: - _, infile_tail = os.path.split(infile) - outfile_tail = infile_tail.replace("c3s", "c3s_regridded") - outfile = os.path.join(cfg["work_dir"], outfile_tail) - with catch_warnings(): - filterwarnings( - action="ignore", - # Full message: - # UserWarning: Skipping global attribute 'long_name': - # 'long_name' is not a permitted attribute - message="Skipping global attribute 'long_name'", - category=UserWarning, - module="iris", - ) - lai_cube = iris.load_cube( - infile, constraint=NameConstraint(var_name=var["raw"]) - ) - lai_cube = regrid( - lai_cube, cfg["custom"]["regrid_resolution"], "nearest" - ) - logger.info("Saving: %s", outfile) - - iris.save(lai_cube, outfile) - - -def _set_time_bnds(in_dir, var): - """Set time_bnds by using attribute and returns a cubelist.""" - # This is a complicated expression, but necessary to keep local - # variables below the limit, otherwise prospector complains. - cubelist = iris.load( - glob.glob( - os.path.join(in_dir, var["file"].replace("c3s", "c3s_regridded")) - ) - ) - - # The purpose of the following loop is to remove any attributes - # that differ between cubes (otherwise concatenation over time fails). - # In addition, care is taken of the time coordinate, by adding the - # time_coverage attributes as time_bnds to the time coordinate. - for n_cube, _ in enumerate(cubelist): - time_coverage_start = cubelist[n_cube].attributes.pop( - "time_coverage_start" - ) - time_coverage_end = cubelist[n_cube].attributes.pop( - "time_coverage_end" - ) - - # Now put time_coverage_start/end as time_bnds - # Convert time_coverage_xxxx to datetime - bnd_a = datetime.strptime(time_coverage_start, "%Y-%m-%dT%H:%M:%SZ") - bnd_b = datetime.strptime(time_coverage_end, "%Y-%m-%dT%H:%M:%SZ") - - # Put in shape for time_bnds - time_bnds_datetime = [bnd_a, bnd_b] - - # Read dataset time unit and calendar from file - dataset_time_unit = str(cubelist[n_cube].coord("time").units) - dataset_time_calender = cubelist[n_cube].coord("time").units.calendar - # Convert datetime - time_bnds = cf_units.date2num( - time_bnds_datetime, dataset_time_unit, dataset_time_calender - ) - # Put them on the file - cubelist[n_cube].coord("time").bounds = time_bnds - - return cubelist + return cube def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): @@ -194,47 +106,97 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): "Creating working directory for regridding: %s", cfg["work_dir"] ) os.mkdir(cfg["work_dir"]) - + for short_name, var in cfg["variables"].items(): var["short_name"] = short_name logger.info("Processing var %s", short_name) - # Regridding - logger.info( - "Start regridding to: %s", cfg["custom"]["regrid_resolution"] - ) - _regrid_dataset(in_dir, var, cfg) - logger.info("Finished regridding") - - # File concatenation - logger.info("Start setting time_bnds") - cubelist = _set_time_bnds(cfg["work_dir"], var) - - # Loop over two different platform names - for platformname in ["SPOT-4", "SPOT-5"]: - # Now split the cubelist on the different platform - logger.info("Start processing part of dataset: %s", platformname) - cubelist_platform = cubelist.extract( - iris.AttributeConstraint(platform=platformname) - ) - for n_cube, _ in enumerate(cubelist_platform): - cubelist_platform[n_cube].attributes.pop("identifier") - if cubelist_platform: - assert _attrs_are_the_same(cubelist_platform) - cube = cubelist_platform.concatenate_cube() - else: - logger.warning( - "No files found for platform %s \ - (check input data)", - platformname, - ) - continue - savename = os.path.join( - cfg["work_dir"], var["short_name"] + platformname + ".nc" - ) - logger.info("Saving as: %s", savename) - iris.save(cube, savename) - logger.info("Finished file concatenation over time") - logger.info("Start CMORization of file %s", savename) - _cmorize_dataset(savename, var, cfg, out_dir) - logger.info("Finished regridding and CMORizing %s", savename) + for year in range(cfg["attributes"]["start_year"], + cfg["attributes"]["end_year"]): + + + for month in range(1,13): + output = iris.cube.CubeList([]) + logger.info(f"Working with year {year}, month {month}") + + # Load orginal data in an indendent function + lai_cube = load_dataset(in_dir, var, cfg, year, month) + + # Regrdding + # uses nearest neighbour, skips if resolution = None + # This uses a huge amount of resource - be careful + resolution = cfg["Parameters"]["custom"]["regrid_resolution"] + if resolution == "None": + logger.info("No regridding") + else: + logger.info(f"Regridding {cfg["Parameters"]["custom"]["regrid_resolution"]}") + lai_cube = regrid( + lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest" + ) + + # make a daily version with Nan cubes for missing days + # This will work with 10-day CDS data and 5-day CCI data in updates at a later date + days_in_month = calendar.monthrange(year, month)[1] + time_coord = lai_cube.coord('time') + time_values = time_coord.points + dts = time_coord.units.num2date(time_values) + days = [item.day for item in dts] + + for day in range(1, days_in_month + 1): + if day in days: + logger.info(f"{day} is in CUBES") + new_cube = iris.util.new_axis(lai_cube[days.index(day)], 'time') + output.append(new_cube) + else: + logger.info(f"{day} NOT in CUBES") + nan_cube = create_dask_cube(lai_cube[0], year, month, day) + new_cube = iris.util.new_axis(nan_cube, 'time') + output.append(new_cube) + + output = output.concatenate_cube() + + # time bounds + # This sets time bounds without needing extra loops and checks + output.coord('time').guess_bounds() + + # cmorize + output = _cmorize_dataset(output, var, cfg) + + # save cube + logger.info(f"Saving CMORized cube for variable {output.var_name}") + # these should all be the same + attributes = cfg["attributes"] + attributes["mip"] = var["mip"] + utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True) + + +def create_dask_cube(cube, year, month, day): + """Create a cube of NaNs for missing days. + + Args: + cube (int): Cube with target shape and coordinates to copy + year (int): Year for time point + month (int): Month for time point + day (int): Day for time point + + Returns: + cube: A 1xlatxlon cube of NaNs + """ + nan_da = da.full(cube.shape, np.nan, + chunks='auto', dtype=np.float32) + + new_cube = cube.copy() + new_cube.data = nan_da + + dataset_time_unit = str(new_cube.coord("time").units) + dataset_time_calender = new_cube.coord("time").units.calendar + + # Convert datetime + newtime = datetime(year=year, month=month, day=day) + newtime = cf_units.date2num( + newtime, dataset_time_unit, dataset_time_calender + ) + + new_cube.coord("time").points = np.float64(newtime) + + return new_cube \ No newline at end of file diff --git a/esmvaltool/recipes/examples/recipe_python.yml b/esmvaltool/recipes/examples/recipe_python.yml index d85e1ae437..d9aa7338f6 100644 --- a/esmvaltool/recipes/examples/recipe_python.yml +++ b/esmvaltool/recipes/examples/recipe_python.yml @@ -41,7 +41,7 @@ preprocessors: annual_mean_amsterdam: extract_location: - location: Amsterdam + location: London scheme: linear annual_statistics: operator: mean