From 9e92fbcff1b8eecdb159d611efe5aa87bc503da5 Mon Sep 17 00:00:00 2001 From: liellnima Date: Tue, 22 Oct 2024 15:39:42 +0200 Subject: [PATCH 01/38] remove model source center and reformat --- climateset/download/constants/esgf_server.py | 369 +++++++++++++++++-- climateset/download/downloader.py | 36 +- 2 files changed, 339 insertions(+), 66 deletions(-) diff --git a/climateset/download/constants/esgf_server.py b/climateset/download/constants/esgf_server.py index cf5bd17..a8d09ef 100644 --- a/climateset/download/constants/esgf_server.py +++ b/climateset/download/constants/esgf_server.py @@ -3,143 +3,113 @@ MODEL_SOURCES = { "ACCESS-CM2": { "node_link": NODE_LINK, - "center": "CSIRO-ARCCSS", }, "ACCESS-ESM1-5": { "node_link": NODE_LINK, - "center": "CSIRO", }, "AWI-CM-1-1-MR": { "node_link": NODE_LINK, - "center": "AWI", }, "BCC-CSM2-MR": { "node_link": NODE_LINK, - "center": "BCC", }, "CAMS-CSM1-0": { "node_link": NODE_LINK, - "center": "CAMS", }, "CAS-ESM2-0": { "node_link": NODE_LINK, - "center": "CAS", }, - "CESM2": {"node_link": NODE_LINK, "center": "NCAR"}, + "CESM2": { + "node_link": NODE_LINK, + }, "CESM2-WACCM": { "node_link": NODE_LINK, - "center": "NCAR", }, "CMCC-CM2-SR5": { "node_link": NODE_LINK, - "center": "NCAR", }, "CMCC-ESM2": { "node_link": NODE_LINK, - "center": "CMCC", }, "CNRM-CM6-1": { "node_link": NODE_LINK, - "center": "CNRM-CERFACS", }, "CNRM-CM6-1-HR": { "node_link": NODE_LINK, - "center": "CNRM-CERFACS", }, "CNRM-ESM2-1": { "node_link": NODE_LINK, - "center": "CNRM-CERFACS", }, "EC-Earth3": { "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", }, "EC-Earth3-Veg": { "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", }, "EC-Earth3-Veg-LR": { "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", }, "FGOALS-f3-L": { "node_link": NODE_LINK, - "center": "CAS", }, "FGOALS-g3": { "node_link": NODE_LINK, - "center": "CAS", }, "GFDL-ESM4": { "node_link": NODE_LINK, - "center": "NOAA-GFDL", }, "GISS-E2-1-G": { "node_link": NODE_LINK, - "center": "NASA-GISS", }, "GISS-E2-1-H": { "node_link": NODE_LINK, - "center": "NASA-GISS", }, "GISS-E2-2-G": { "node_link": NODE_LINK, - "center": "NASA-GISS", }, "IITM-ESM": { "node_link": NODE_LINK, - "center": "CCCR-IITM", }, "INM-CM4-8": { "node_link": NODE_LINK, - "center": "INM", }, "INM-CM5-0": { "node_link": NODE_LINK, - "center": "INM", }, "IPSL-CM6A-LR": { "node_link": NODE_LINK, - "center": "IPSL", }, "KACE-1-0-G": { "node_link": NODE_LINK, - "center": "NIMS-KMA ", }, "MCM-UA-1-0": { "node_link": NODE_LINK, - "center": "UA", }, - "MIROC6": {"node_link": NODE_LINK, "center": "MIROC"}, + "MIROC6": { + "node_link": NODE_LINK, + }, # there are several centers for the MPI models - consider choosing another one if needed "MPI-ESM1-2-HR": { "node_link": NODE_LINK, - "center": "MPI-M", }, "MPI-ESM1-2-LR": { "node_link": NODE_LINK, - "center": "MPI-M", }, "MRI-ESM2-0": { "node_link": NODE_LINK, - "center": "MRI", }, "NorESM2-LM": { "node_link": "https://esgf-data.dkrz.de/esg-search", - "center": "NCC", }, "NorESM2-MM": { "node_link": "https://esgf-data.dkrz.de/esg-search", - "center": "NCC", }, "TaiESM1": { "node_link": NODE_LINK, - "center": "AS-RCEC", }, # there are several centers for the UKESM models - consider choosing another one if needed "UKESM1-0-LL": { "node_link": NODE_LINK, - "center": "MOHC", }, # "NorESM2-LM": {"node_link": "https://esgf-data.dkrz.de/esg-search", "center": "NCC"}, # "CanESM5" : {"node_link": NODE_LINK, "center": "CCCma"} @@ -1346,8 +1316,333 @@ "hist-GHG", "amip", ] + +SUPPORTED_EXPERIMENTS = [ + "hist-1950HC", + "lfmip-pdLC", + "ssp126", + "ssp126-ssp370Lu", + "ssp245", + "ssp370", + "ssp370-lowNTCF", + "ssp370-ssp126Lu", + "ssp370SST", + "ssp370SST-lowCH4", + "ssp370SST-lowNTCF", + "ssp370SST-ssp126Lu", + "ssp585", + "hist-resAMO", + "hist-resIPO", + "historical-ext", + "lfmip-initLC", + "lfmip-pdLC-cruNcep", + "lfmip-pdLC-princeton", + "lfmip-pdLC-wfdei", + "lfmip-rmLC", + "lfmip-rmLC-cruNcep", + "lfmip-rmLC-princeton", + "lfmip-rmLC-wfdei", + "pa-futAntSIC", + "pa-futArcSIC", + "pa-pdSIC", + "pa-piAntSIC", + "pa-piArcSIC", + "ssp119", + "ssp370pdSST", + "ssp370SST-lowAer", + "ssp370SST-lowBC", + "ssp370SST-lowO3", + "ssp434", + "ssp460", + "dcppC-atl-pacemaker", + "dcppC-pac-pacemaker", + "pa-futAntSIC-ext", + "pa-futArcSIC-ext", + "pa-pdSIC-ext", + "ssp370-lowNTCFCH4", + "ssp370SST-lowNTCFCH4", + "volc-cluster-21C", + "yr2010CO2", + "dcppA-historical-niff", + "1pctCO2", + "1pctCO2-bgc", + "abrupt-4xCO2", + "dcppC-amv-neg", + "dcppC-amv-pos", + "dcppC-atl-control", + "dcppC-ipv-neg", + "dcppC-ipv-pos", + "dcppC-pac-control", + "deforest-globe", + "faf-heat", + "faf-heat-NA50pct", + "faf-stress", + "faf-water", + "G1", + "hist-aer", + "hist-GHG", + "hist-nat", + "hist-noLu", + "hist-piNTCF", + "hist-spAer-all", + "histSST", + "histSST-noLu", + "histSST-piCH4", + "histSST-piNTCF", + "piClim-4xCO2", + "piClim-aer", + "piClim-anthro", + "piClim-CH4", + "piClim-control", + "piClim-ghg", + "piClim-HC", + "piClim-lu", + "piClim-NTCF", + "volc-long-eq", + "volc-pinatubo-full", + "volc-pinatubo-strat", + "volc-pinatubo-surf", + "1pctCO2-rad", + "1pctCO2Ndep", + "1pctCO2Ndep-bgc", + "abrupt-0p5xCO2", + "abrupt-2xCO2", + "abrupt-solm4p", + "abrupt-solp4p", + "dcppC-amv-ExTrop-neg", + "dcppC-amv-ExTrop-pos", + "dcppC-amv-Trop-neg", + "dcppC-amv-Trop-pos", + "dcppC-ipv-NexTrop-neg", + "dcppC-ipv-NexTrop-pos", + "faf-all", + "faf-antwater-stress", + "faf-heat-NA0pct", + "faf-passiveheat", + "hist-bgc", + "hist-piAer", + "hist-spAer-aer", + "hist-stratO3", + "histSST-piAer", + "histSST-piN2O", + "histSST-piO3", + "piClim-2xdust", + "piClim-2xss", + "piClim-BC", + "piClim-histaer", + "piClim-histall", + "piClim-histghg", + "piClim-histnat", + "piClim-N2O", + "piClim-O3", + "piClim-spAer-aer", + "piClim-spAer-anthro", + "piClim-spAer-histaer", + "piClim-spAer-histall", + "piSST-4xCO2-solar", + "volc-cluster-ctrl", + "volc-long-hlN", + "hist-all-aer2", + "hist-all-nat2", + "hist-CO2", + "hist-sol", + "hist-totalO3", + "hist-volc", + "piClim-2xDMS", + "piClim-2xfire", + "piClim-2xNOx", + "piClim-2xVOC", + "piClim-NH3", + "piClim-NOx", + "piClim-OC", + "piClim-SO2", + "piClim-VOC", + "volc-long-hlS", + "histSST-1950HC", + "esm-ssp585", + "esm-ssp585-ssp126Lu", + "esm-hist-ext", + "ssp534-over-bgc", + "ssp585-bgc", + "esm-1pct-brch-1000PgC", + "esm-1pct-brch-750PgC", + "esm-1pct-brch-2000PgC", + "esm-hist", + "esm-pi-cdr-pulse", + "esm-pi-CO2pulse", + "esm-1pctCO2", + "esm-bell-750PgC", + "esm-bell-1000PgC", + "esm-bell-2000PgC", + "esm-yr2010CO2-control", + "1pctCO2-4xext", + "1pctCO2-cdr", + "esm-ssp534-over", + "esm-ssp585-ocn-alk", + "esm-ssp585ext", + "esm-ssp585-ocn-alk-stop", + "esm-ssp585-ssp126Lu-ext", + "esm-yr2010CO2-cdr-pulse", + "esm-yr2010CO2-CO2pulse", + "esm-yr2010CO2-noemit", + "amip", + "amip-4xCO2", + "amip-future4K", + "amip-hist", + "amip-p4K", + "aqua-4xCO2", + "aqua-control", + "aqua-p4K", + "highresSST-present", + "ism-ctrl-std", + "ism-pdControl-std", + "ism-piControl-self", + "land-hist", + "land-hist-altStartYear", + "land-noLu", + "land-ssp126", + "land-ssp585", + "lgm", + "lig127k", + "midHolocene", + "midPliocene-eoi400", + "omip1", + "past1000", + "piControl-withism", + "rad-irf", + "a4SST", + "a4SSTice", + "a4SSTice-4xCO2", + "amip-a4SST-4xCO2", + "amip-lfmip-pdLC", + "amip-lfmip-pObs", + "amip-lfmip-rmLC", + "amip-lwoff", + "amip-m4K", + "amip-p4K-lwoff", + "amip-piForcing", + "aqua-control-lwoff", + "aqua-p4K-lwoff", + "dcppA-assim", + "esm-piControl-spinup", + "land-cClim", + "land-cCO2", + "land-crop-grass", + "land-crop-noFert", + "land-crop-noIrrig", + "land-crop-noIrrigFert", + "land-hist-altLu1", + "land-hist-altLu2", + "land-hist-cruNcep", + "land-hist-princeton", + "land-hist-wfdei", + "land-noFire", + "land-noPasture", + "land-noShiftCultivate", + "land-noWoodHarv", + "land-ssp434", + "omip1-spunup", + "past1000-solaronly", + "past1000-volconly", + "piControl-spinup", + "piControl-spinup-cmip5", + "piSST", + "piSST-4xCO2", + "piSST-4xCO2-rad", + "piSST-pxK", + "spinup-1950", + "amip-hld", + "amip-TIP", + "amip-TIP-nosh", + "control-slab", + "dcppC-atl-spg", + "esm-past1000", + "ism-lig127k-std", + "omip2", + "omip2-spunup", + "past2k", + "esm-piControl", + "historical", + "historical-cmip5", + "hist-aer-cmip5", + "hist-GHG-cmip5", + "hist-nat-cmip5", + "piControl", + "piControl-cmip5", + "ssp245-aer", + "ssp245-cov-strgreen", + "ssp245-covid", + "ssp245-cov-aer", + "ssp245-cov-fossil", + "ssp245-cov-GHG", + "ssp245-cov-modgreen", + "ssp245-GHG", + "ssp245-nat", + "ssp245-stratO3", + "dcppA-hindcast", + "dcppB-forecast", + "dcppC-forecast-addPinatubo", + "dcppC-hindcast-noPinatubo", + "dcppC-hindcast-noAgung", + "dcppC-hindcast-noElChichon", + "dcppC-forecast-addAgung", + "dcppC-forecast-addElChichon", + "dcppA-hindcast-niff", + "futureSST-4xCO2-solar", + "G6solar", + "G6sulfur", + "G6SST1", + "G7cirrus", + "G7SST1-cirrus", + "ssp534-over", + "G6SST2-solar", + "G6SST2-sulfur", + "G7SST2-cirrus", + "control-1950", + "hist-1950", + "highres-future", + "highresSST-4xCO2", + "highresSST-future", + "highresSST-LAI", + "highresSST-p4K", + "highresSST-smoothed", + "1pctCO2to4x-withism", + "historical-withism", + "ism-1pctCO2to4x-self", + "ism-historical-self", + "ism-1pctCO2to4x-std", + "ism-historical-std", + "ism-asmb-std", + "ism-bsmb-std", + "ism-amip-std", + "ism-ssp585-self", + "ism-ssp585-std", + "ssp585-withism", + "pdSST-futAntSIC", + "pdSST-futArcSIC", + "pdSST-pdSIC", + "pdSST-piAntSIC", + "pdSST-piArcSIC", + "piSST-pdSIC", + "futSST-pdSIC", + "piSST-piSIC", + "amip-climSIC", + "amip-climSST", + "modelSST-futArcSIC", + "modelSST-pdSIC", + "pdSST-futArcSICSIT", + "pdSST-futBKSeasSIC", + "pdSST-futOkhotskSIC", + "pdSST-pdSICSIT", + "rcp26-cmip5", + "rcp45-cmip5", + "rcp60-cmip5", + "rcp85-cmip5", + "volc-cluster-mill", + "volc-pinatubo-slab", +] # filepath to var to res Mapping -VAR_RES_MAPPING_PATH = "/home/charlie/Documents/MILA/causalpaca/data/data_description/mappings/variableid2tableid.csv" +# VAR_RES_MAPPING_PATH = "/home/charlie/Documents/MILA/causalpaca/data/data_description/mappings/variableid2tableid.csv" GRIDDING_HIERACHY = ["gn"] diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index 8ddd16f..e6220f2 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -2,7 +2,6 @@ import pathlib from typing import Union -import pandas as pd from pyesgf.search import SearchConnection from climateset import RAW_DATA @@ -21,8 +20,6 @@ download_metadata_variable, download_model_variable, download_raw_input_variable, - get_max_ensemble_member_number, - get_select_model_scenarios, get_upload_version, ) from climateset.utils import create_logger, get_keys_from_value, get_yaml_config @@ -72,7 +69,6 @@ def __init__( # noqa: C901 self.logger = logger self.model: str = model self.model_node_link: str = "" - self.model_source_center: str = "" if experiments is None: experiments = [ "historical", @@ -98,33 +94,14 @@ def __init__( # noqa: C901 self.download_biomass_burning: bool = download_biomassburning self.use_plain_emission_vars: bool = use_plain_emission_vars + # if max ensemble member number is too large --> we are relying on the server to complain? + # Args processing - selected_scenarios = get_select_model_scenarios() - self._hande_max_possible_member_number( - df_model_source=selected_scenarios, max_ensemble_members=max_ensemble_members - ) self._handle_variables( variables=variables, ) self._handle_model_params() - # - # Internal helper functions for class init - # - def _hande_max_possible_member_number(self, df_model_source: pd.DataFrame, max_ensemble_members: int): - max_possible_member_number = get_max_ensemble_member_number( - df_model_source=df_model_source, experiments=self.experiments, model=self.model - ) - if max_ensemble_members == -1: - self.logger.info("Trying to take all ensemble members available.") - self.max_ensemble_members = max_possible_member_number - # verify that we have enough members for wanted experiments - # else choose the smallest available for all - if max_ensemble_members > max_possible_member_number: - self.logger.info("Not enough members available. Choosing smallest maximum.") - self.max_ensemble_members = max_possible_member_number - self.logger.info(f"Downloading data for {self.max_ensemble_members} members.") - def _handle_variables(self, variables: list[str]): self._generate_variables(variables=variables) self._generate_plain_emission_vars() @@ -138,14 +115,13 @@ def _handle_variables(self, variables: list[str]): def _handle_model_params(self): try: self.model_node_link = MODEL_SOURCES[self.model]["node_link"] - self.model_source_center = MODEL_SOURCES[self.model]["center"] except KeyError: - self.model = next(iter(MODEL_SOURCES)) if self.model is not None: - self.logger.info(f"WARNING: Model {self.model} unknown. Using default instead.") + self.logger.info(f"WARNING: Model {self.model} unknown.") + # TODO cause an error here and exit (move to next download item) + self.model = next(iter(MODEL_SOURCES)) self.logger.info(f"Using : {self.model}") self.model_node_link = MODEL_SOURCES[self.model]["node_link"] - self.model_source_center = MODEL_SOURCES[self.model]["center"] def _generate_plain_emission_vars(self): if self.use_plain_emission_vars: @@ -251,6 +227,8 @@ def download_from_model_single_var( # noqa: C901 ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + # CONTINUE DEBUGGING HERE + variants = list(ctx.facet_counts["variant_label"]) self.logger.info(f"Available variants : {variants}\n") From 7e085752b2e39d5a1331f7354e8a326bad5f66b7 Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:10:58 +0200 Subject: [PATCH 02/38] move selected scenario mip files to docs --- .../download/constants => docs}/selected_scenariosMIPs.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {climateset/download/constants => docs}/selected_scenariosMIPs.json (100%) diff --git a/climateset/download/constants/selected_scenariosMIPs.json b/docs/selected_scenariosMIPs.json similarity index 100% rename from climateset/download/constants/selected_scenariosMIPs.json rename to docs/selected_scenariosMIPs.json From 68dabf590eaa9c8fda311f71b4b6569073fdc964 Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:11:59 +0200 Subject: [PATCH 03/38] update download configs for project, and ensemble members --- .../downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml | 1 + configs/downloader/cmip6/canesm_co2_ssp.yaml | 3 ++- configs/downloader/cmip6/fgoals_tas_ssp.yaml | 1 + configs/downloader/cmip6/noresm_pr_historical.yaml | 4 +++- configs/downloader/cmip6/noresm_tas_ssp.yaml | 4 +++- configs/downloader/cmip6/ukesm_tas_picontrol.yaml | 1 + configs/downloader/future_usecases/cmip7.yaml | 1 + configs/downloader/input4mips/bc_historical.yaml | 1 + configs/downloader/input4mips/bc_ssp.yaml | 1 + configs/downloader/input4mips/ch4_historical.yaml | 1 + configs/downloader/input4mips/ch4_ssp.yaml | 1 + configs/downloader/input4mips/co2_historical.yaml | 1 + configs/downloader/input4mips/co2_ssp.yaml | 1 + configs/downloader/input4mips/so2_historical.yaml | 1 + configs/downloader/input4mips/so2_ssp.yaml | 1 + 15 files changed, 20 insertions(+), 3 deletions(-) diff --git a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml index db0c390..2c95dc4 100644 --- a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml +++ b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml @@ -1,3 +1,4 @@ +project: "CMIP6" models: ["AWI-CM-1-1-HR"] downloader_kwargs: variables: ["sithick"] diff --git a/configs/downloader/cmip6/canesm_co2_ssp.yaml b/configs/downloader/cmip6/canesm_co2_ssp.yaml index fc373d0..affd101 100644 --- a/configs/downloader/cmip6/canesm_co2_ssp.yaml +++ b/configs/downloader/cmip6/canesm_co2_ssp.yaml @@ -1,4 +1,5 @@ +project: "CMIP6" models: ["CanESM5"] downloader_kwargs: variables: ["co2"] - experiments: ["ssp245"] \ No newline at end of file + experiments: ["abrupt-2xCO2"] \ No newline at end of file diff --git a/configs/downloader/cmip6/fgoals_tas_ssp.yaml b/configs/downloader/cmip6/fgoals_tas_ssp.yaml index cfd3eb1..5cbfc5d 100644 --- a/configs/downloader/cmip6/fgoals_tas_ssp.yaml +++ b/configs/downloader/cmip6/fgoals_tas_ssp.yaml @@ -1,3 +1,4 @@ +project: "CMIP6" models: ["FGOALS-g3"] downloader_kwargs: variables: ["tas"] diff --git a/configs/downloader/cmip6/noresm_pr_historical.yaml b/configs/downloader/cmip6/noresm_pr_historical.yaml index 0498820..39b940d 100644 --- a/configs/downloader/cmip6/noresm_pr_historical.yaml +++ b/configs/downloader/cmip6/noresm_pr_historical.yaml @@ -1,4 +1,6 @@ +project: "CMIP6" models: ["NorESM2-LM"] downloader_kwargs: variables: ["pr"] - experiments: ["historical"] \ No newline at end of file + experiments: ["historical"] + max_ensemble_members: 1 \ No newline at end of file diff --git a/configs/downloader/cmip6/noresm_tas_ssp.yaml b/configs/downloader/cmip6/noresm_tas_ssp.yaml index e1e7238..dde1d45 100644 --- a/configs/downloader/cmip6/noresm_tas_ssp.yaml +++ b/configs/downloader/cmip6/noresm_tas_ssp.yaml @@ -1,4 +1,6 @@ +project: "CMIP6" models: ["NorESM2-LM"] downloader_kwargs: variables: ["tas"] - experiments: ["ssp245"] \ No newline at end of file + experiments: ["ssp245"] + ensemble_members: ["r9i1p1f2", "r8i1p1f2"] \ No newline at end of file diff --git a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml index 2b2a25d..dc94d3d 100644 --- a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml +++ b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml @@ -1,3 +1,4 @@ +project: "CMIP6" models: ["UKESM1-0-LL"] downloader_kwargs: variables: ["tas"] diff --git a/configs/downloader/future_usecases/cmip7.yaml b/configs/downloader/future_usecases/cmip7.yaml index 1a31530..69460ed 100644 --- a/configs/downloader/future_usecases/cmip7.yaml +++ b/configs/downloader/future_usecases/cmip7.yaml @@ -1,3 +1,4 @@ +project: "CMIP6Plus" models: ["HasGEM3-GC31-LL"] downloader_kwargs: variables: ["mrsofc"] diff --git a/configs/downloader/input4mips/bc_historical.yaml b/configs/downloader/input4mips/bc_historical.yaml index 717e0c2..1ad3cec 100644 --- a/configs/downloader/input4mips/bc_historical.yaml +++ b/configs/downloader/input4mips/bc_historical.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["BC"] diff --git a/configs/downloader/input4mips/bc_ssp.yaml b/configs/downloader/input4mips/bc_ssp.yaml index 1608f92..165c962 100644 --- a/configs/downloader/input4mips/bc_ssp.yaml +++ b/configs/downloader/input4mips/bc_ssp.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["BC"] diff --git a/configs/downloader/input4mips/ch4_historical.yaml b/configs/downloader/input4mips/ch4_historical.yaml index 1e5ad30..054ec1f 100644 --- a/configs/downloader/input4mips/ch4_historical.yaml +++ b/configs/downloader/input4mips/ch4_historical.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["CH4"] diff --git a/configs/downloader/input4mips/ch4_ssp.yaml b/configs/downloader/input4mips/ch4_ssp.yaml index 4282283..b023507 100644 --- a/configs/downloader/input4mips/ch4_ssp.yaml +++ b/configs/downloader/input4mips/ch4_ssp.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["CH4"] diff --git a/configs/downloader/input4mips/co2_historical.yaml b/configs/downloader/input4mips/co2_historical.yaml index c415eb8..2936afd 100644 --- a/configs/downloader/input4mips/co2_historical.yaml +++ b/configs/downloader/input4mips/co2_historical.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["CO2"] diff --git a/configs/downloader/input4mips/co2_ssp.yaml b/configs/downloader/input4mips/co2_ssp.yaml index 2a166d4..1054fbb 100644 --- a/configs/downloader/input4mips/co2_ssp.yaml +++ b/configs/downloader/input4mips/co2_ssp.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["C02"] diff --git a/configs/downloader/input4mips/so2_historical.yaml b/configs/downloader/input4mips/so2_historical.yaml index f639cd3..75cfa41 100644 --- a/configs/downloader/input4mips/so2_historical.yaml +++ b/configs/downloader/input4mips/so2_historical.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["SO2"] diff --git a/configs/downloader/input4mips/so2_ssp.yaml b/configs/downloader/input4mips/so2_ssp.yaml index 56c5b5b..74826f0 100644 --- a/configs/downloader/input4mips/so2_ssp.yaml +++ b/configs/downloader/input4mips/so2_ssp.yaml @@ -1,3 +1,4 @@ +project: "input4mips" models: ["NorESM2-LM"] downloader_kwargs: variables: ["SO2"] From 114eb1080e25e81ef62e45cd0ea463e48ea3afd5 Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:13:13 +0200 Subject: [PATCH 04/38] remove unused esm_constants --- .../download/constants/esm_constants.py | 82 ------------------- 1 file changed, 82 deletions(-) delete mode 100644 climateset/download/constants/esm_constants.py diff --git a/climateset/download/constants/esm_constants.py b/climateset/download/constants/esm_constants.py deleted file mode 100644 index 780fbbe..0000000 --- a/climateset/download/constants/esm_constants.py +++ /dev/null @@ -1,82 +0,0 @@ -### DOWNLOADER PARAMS ########################################################## - -# these resolutions are stored in RESOLUTION -TEMP_RES = 0 -VERT_RES = 0 -LON_RES = 0 -LAT_RES = 0 - -# resolution of the end-data-product -RESOLUTION = (TEMP_RES, VERT_RES, LON_RES, LAT_RES) - -# list of years that are considered for the data -YEARS = [0] - - -# distinction not necessary for the mother as we are first just providing data not designing the loader yet, and a -# lookup table to check where to downloda what from anyway -# # variables used as input for the climate model -# IN_VARS = [] -# -# # predicted / target variables of the climate model -# OUT_VARS = [] -# # suggestion charlie -# VARS = ["nan"] -# # Julia: Birth has three steps: downloading, preprocessing, creating the different resolutions -# # and we already need to distinct between in_vars and out_vars for that - - -CO2 = ["CO2", "CO2_em_anthro", "CO2_em_openburning", "CO2_em_AIR_anthro"] -BC = ["BC", "BC_em_anthro", "BC_em_openburning", "BC_em_AIR_anthro"] -CH4 = ["CH4", "CH4_em_anthro", "CH4_em_openburning", "CH4_em_AIR_anthro"] -SO2 = ["SO2", "SO2_em_anthro", "SO2_em_openburning", "SO2_em_AIR_anthro"] - -IN_VARS = CO2 + BC + CH4 + SO2 -OUT_VARS = ["pr", "tas"] - -VARS = IN_VARS + OUT_VARS - -# scenarios -SCENARIOS = ["historical", "ssp126", "ssp245", "ssp370", "ssp585"] -ADDITIONAL_SCENARIOS = ["hist-aer", "hist-GHG", "piControl", "ssp370-lowNTCF"] - -# model -MODELS = ["nan"] - -# number of ensemble members to be considered -NUM_ENSEMBLE = 1 - -# which type of grid -GRID = "grid" - -### RAW PROCESSER PARAMS ####################################################### -# you will see after downloading - -### RESOLUTION PROCESSER PARAMS ################################################ - -# THIS must be moved somewhere else, because it's not static -# tuple of "means" of preprocesser for each variable, e.g. -# [("CO2", "mean"), ["CH4", "median"] -CHOSEN_AGGREGATIONS = [ - "MeanAggregation", - "MinAggregation", - "MaxAggregation", - "InstAggregation", -] -# TODO communicate to other persons which data structure etc you use here -CHOSEN_INTERPOLATIONS = {"nan"} -# TODO create a fixed list for all vars: which aggregation and interpolation - -### ALL PARAMS IN DICT ######################################################### -CORE_PARAMS = { - "models": MODELS, - "scenarios": SCENARIOS, - "years": YEARS, - "in_vars": IN_VARS, - "out_vars": OUT_VARS, - "vars": VARS, - "resolutions": RESOLUTION, - "grid": GRID, - "aggregations": CHOSEN_AGGREGATIONS, - "interpolations": CHOSEN_INTERPOLATIONS, -} From 47f0c78024b08654f64ad7236a1dcf32aecb42e2 Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:14:16 +0200 Subject: [PATCH 05/38] add new constant files for each esgf project type --- .../{esgf_server.py => cmip6_constants.py} | 300 +++++++++--------- .../download/constants/cmip6plus_constants.py | 26 ++ .../constants/input4mips_constants.py | 1 + 3 files changed, 169 insertions(+), 158 deletions(-) rename climateset/download/constants/{esgf_server.py => cmip6_constants.py} (91%) create mode 100644 climateset/download/constants/cmip6plus_constants.py create mode 100644 climateset/download/constants/input4mips_constants.py diff --git a/climateset/download/constants/esgf_server.py b/climateset/download/constants/cmip6_constants.py similarity index 91% rename from climateset/download/constants/esgf_server.py rename to climateset/download/constants/cmip6_constants.py index a8d09ef..ed4606c 100644 --- a/climateset/download/constants/esgf_server.py +++ b/climateset/download/constants/cmip6_constants.py @@ -1,119 +1,146 @@ -# Supported Model sources +# The values here have been retrieved from here: +# https://wcrp-cmip.org/cmip-data-access/ + +# This entry node link is automatically changing to other nodes NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" -MODEL_SOURCES = { - "ACCESS-CM2": { - "node_link": NODE_LINK, - }, - "ACCESS-ESM1-5": { - "node_link": NODE_LINK, - }, - "AWI-CM-1-1-MR": { - "node_link": NODE_LINK, - }, - "BCC-CSM2-MR": { - "node_link": NODE_LINK, - }, - "CAMS-CSM1-0": { - "node_link": NODE_LINK, - }, - "CAS-ESM2-0": { - "node_link": NODE_LINK, - }, - "CESM2": { - "node_link": NODE_LINK, - }, - "CESM2-WACCM": { - "node_link": NODE_LINK, - }, - "CMCC-CM2-SR5": { - "node_link": NODE_LINK, - }, - "CMCC-ESM2": { - "node_link": NODE_LINK, - }, - "CNRM-CM6-1": { - "node_link": NODE_LINK, - }, - "CNRM-CM6-1-HR": { - "node_link": NODE_LINK, - }, - "CNRM-ESM2-1": { - "node_link": NODE_LINK, - }, - "EC-Earth3": { - "node_link": NODE_LINK, - }, - "EC-Earth3-Veg": { - "node_link": NODE_LINK, - }, - "EC-Earth3-Veg-LR": { - "node_link": NODE_LINK, - }, - "FGOALS-f3-L": { - "node_link": NODE_LINK, - }, - "FGOALS-g3": { - "node_link": NODE_LINK, - }, - "GFDL-ESM4": { - "node_link": NODE_LINK, - }, - "GISS-E2-1-G": { - "node_link": NODE_LINK, - }, - "GISS-E2-1-H": { - "node_link": NODE_LINK, - }, - "GISS-E2-2-G": { - "node_link": NODE_LINK, - }, - "IITM-ESM": { - "node_link": NODE_LINK, - }, - "INM-CM4-8": { - "node_link": NODE_LINK, - }, - "INM-CM5-0": { - "node_link": NODE_LINK, - }, - "IPSL-CM6A-LR": { - "node_link": NODE_LINK, - }, - "KACE-1-0-G": { - "node_link": NODE_LINK, - }, - "MCM-UA-1-0": { - "node_link": NODE_LINK, - }, - "MIROC6": { - "node_link": NODE_LINK, - }, - # there are several centers for the MPI models - consider choosing another one if needed - "MPI-ESM1-2-HR": { - "node_link": NODE_LINK, - }, - "MPI-ESM1-2-LR": { - "node_link": NODE_LINK, - }, - "MRI-ESM2-0": { - "node_link": NODE_LINK, - }, - "NorESM2-LM": { - "node_link": "https://esgf-data.dkrz.de/esg-search", - }, - "NorESM2-MM": { - "node_link": "https://esgf-data.dkrz.de/esg-search", - }, - "TaiESM1": { - "node_link": NODE_LINK, - }, - # there are several centers for the UKESM models - consider choosing another one if needed - "UKESM1-0-LL": { - "node_link": NODE_LINK, - }, - # "NorESM2-LM": {"node_link": "https://esgf-data.dkrz.de/esg-search", "center": "NCC"}, - # "CanESM5" : {"node_link": NODE_LINK, "center": "CCCma"} -} + +# Supported Model sources + +MODEL_SOURCES = [ + "4AOP-v1-5", + "ACCESS-CM2", + "ACCESS-ESM1-5", + "ACCESS-OM2", + "ACCESS-OM2-025", + "ARTS-2-3", + "AWI-CM-1-1-HR", + "AWI-CM-1-1-LR", + "AWI-CM-1-1-MR", + "AWI-ESM-1-1-LR", + "AWI-ESM-2-1-LR", + "BCC-CSM2-HR", + "BCC-CSM2-MR", + "BCC-ESM1", + "CAM-MPAS-HR", + "CAM-MPAS-LR", + "CAMS-CSM1-0", + "CanESM5", + "CanESM5-1", + "CanESM5-CanOE", + "CAS-ESM2-0", + "CESM1-1-CAM5-CMIP5", + "CESM1-CAM5-SE-HR", + "CESM1-CAM5-SE-LR", + "CESM1-WACCM-SC", + "CESM2", + "CESM2-FV2", + "CESM2-WACCM", + "CESM2-WACCM-FV2", + "CIESM", + "CMCC-CM2-HR4", + "CMCC-CM2-SR5", + "CMCC-CM2-VHR4", + "CMCC-ESM2", + "CNRM-CM6-1", + "CNRM-CM6-1-HR", + "CNRM-ESM2-1", + "E3SM-1-0", + "E3SM-1-1", + "E3SM-1-1-ECA", + "E3SM-2-0", + "EC-Earth3", + "EC-Earth3-AerChem", + "EC-Earth3-CC", + "EC-Earth3-GrIS", + "EC-Earth3-HR", + "EC-Earth3-LR", + "EC-Earth3-Veg", + "EC-Earth3-Veg-LR", + "EC-Earth3P", + "EC-Earth3P-HR", + "EC-Earth3P-VHR", + "ECMWF-IFS-HR", + "ECMWF-IFS-LR", + "ECMWF-IFS-MR", + "FGOALS-f3-H", + "FGOALS-f3-L", + "FGOALS-g3", + "FIO-ESM-2-0", + "GFDL-AM4", + "GFDL-CM4", + "GFDL-CM4C192", + "GFDL-ESM2M", + "GFDL-ESM4", + "GFDL-GRTCODE", + "GFDL-OM4p5B", + "GFDL-RFM-DISORT", + "GISS-E2-1-G", + "GISS-E2-1-G-CC", + "GISS-E2-1-H", + "GISS-E2-2-G", + "GISS-E2-2-H", + "GISS-E3-G", + "HadGEM3-GC31-HH", + "HadGEM3-GC31-HM", + "HadGEM3-GC31-LL", + "HadGEM3-GC31-LM", + "HadGEM3-GC31-MH", + "HadGEM3-GC31-MM", + "HiRAM-SIT-HR", + "HiRAM-SIT-LR", + "ICON-ESM-LR", + "IITM-ESM", + "INM-CM4-8", + "INM-CM5-0", + "INM-CM5-H", + "IPSL-CM5A2-INCA", + "IPSL-CM6A-ATM-HR", + "IPSL-CM6A-ATM-ICO-HR", + "IPSL-CM6A-ATM-ICO-LR", + "IPSL-CM6A-ATM-ICO-MR", + "IPSL-CM6A-ATM-ICO-VHR", + "IPSL-CM6A-ATM-LR-REPROBUS", + "IPSL-CM6A-LR", + "IPSL-CM6A-LR-INCA", + "IPSL-CM6A-MR1", + "KACE-1-0-G", + "KIOST-ESM", + "LBLRTM-12-8", + "MCM-UA-1-0", + "MIROC-ES2H", + "MIROC-ES2H-NB", + "MIROC-ES2L", + "MIROC6", + "MPI-ESM-1-2-HAM", + "MPI-ESM1-2-HR", + "MPI-ESM1-2-LR", + "MPI-ESM1-2-XR", + "MRI-AGCM3-2-H", + "MRI-AGCM3-2-S", + "MRI-ESM2-0", + "NESM3", + "NICAM16-7S", + "NICAM16-8S", + "NICAM16-9S", + "NorCPM1", + "NorESM1-F", + "NorESM2-LM", + "NorESM2-MH", + "RRTMG-LW-4-91", + "RRTMG-SW-4-02", + "RTE-RRTMGP-181204", + "SAM0-UNICON", + "TaiESM1", + "TaiESM1-TIMCOM", + "TaiESM1-TIMCOM2", + "UKESM1-0-LL", + "UKESM1-1-LL", + "UKESM1-ice-LL", + "E3SM-2-0-NARRM", + "E3SM-2-1", + "EC-Earth3-ESM-1", + "PCMDI-test-1-0", +] VAR_SOURCE_LOOKUP = { "model": [ @@ -1274,49 +1301,6 @@ ], } -SUPPORTED_EXPERIMENTS = [ - "ssp585", - "ssp370-lowNTCF", - "ssp370", - "ssp245", - "ssp126", - "piControl", - "piClim-spAer-anthro", - "piClim-spAer-aer", - "piClim-lu", - "piClim-histnat", - "piClim-histghg", - "piClim-histall", - "piClim-histaer", - "piClim-ghg", - "piClim-control", - "piClim-anthro", - "piClim-aer", - "piClim-N2O", - "piClim-CH4", - "piClim-4xCO2", - "piClim-2xss", - "piClim-2xdust", - "piClim-2xVOC", - "piClim-2xDMS", - "pdSST-piArcSIC", - "pdSST-pdSIC", - "pdSST-futArcSIC", - "midHolocene", - "lig127k", - "historical", - "histSST-piNTCF", - "histSST-piAer", - "histSST", - "hist-spAer-all", - "hist-piNTCF", - "hist-piAer", - "hist-nat", - "hist-aer", - "hist-GHG", - "amip", -] - SUPPORTED_EXPERIMENTS = [ "hist-1950HC", "lfmip-pdLC", diff --git a/climateset/download/constants/cmip6plus_constants.py b/climateset/download/constants/cmip6plus_constants.py new file mode 100644 index 0000000..1f20a8b --- /dev/null +++ b/climateset/download/constants/cmip6plus_constants.py @@ -0,0 +1,26 @@ +NODE_LINK = "http://esgf-data2.llnl.gov" + +MODEL_SOURCES = [ + "HasGEM3-GC31-LL", +] + +VAR_SOURCE_LOOKUP = { + "model": [ + "areacella", + "mrsofc", + ], + "raw": [ + "areacella", + "mrsofc", + ], +} + +SUPPORTED_EXPERIMENTS = [ + "hist-lu", + "hist-piAer", + "hist-piVolc", +] + +GRIDDING_HIERACHY = ["gn"] + +RES_TO_CHUNKSIZE = {"year": 1, "mon": 12, "6hr": 1460, "3hr": 2920, "day": 364} diff --git a/climateset/download/constants/input4mips_constants.py b/climateset/download/constants/input4mips_constants.py new file mode 100644 index 0000000..119b98c --- /dev/null +++ b/climateset/download/constants/input4mips_constants.py @@ -0,0 +1 @@ +NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" From aa89ff6c4a70360975b7a122fbb994b594003c8e Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:15:32 +0200 Subject: [PATCH 06/38] remove get_selected_scenario as it is too restricting --- climateset/download/utils.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/climateset/download/utils.py b/climateset/download/utils.py index 4adc8e7..a5632d8 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -8,7 +8,7 @@ import pandas as pd import xarray as xr -from climateset import APP_ROOT, RAW_DATA +from climateset import RAW_DATA from climateset.utils import create_logger LOGGER = create_logger(__name__) @@ -273,21 +273,3 @@ def _handle_base_search_constraints(ctx, default_frequency, default_grid_label): if frequency: ctx = ctx.constrain(frequency=frequency) return ctx - - -def get_select_model_scenarios(path_to_file: Union[str, pathlib.Path] = None) -> pd.DataFrame: - """ - This function returns a dataframe based on input Json file. - - Args: - path_to_file: Path to Json file - - Returns: - Dataframe - """ - if not path_to_file: - path_to_file = APP_ROOT / "download/constants/selected_scenariosMIPs.json" - if isinstance(path_to_file, str): - path_to_file = pathlib.Path(path_to_file) - selected_scenarios = pd.read_json(path_to_file, orient="records") - return selected_scenarios From a61dc2a9b41bc0741620f7480ff50b52e06a4cc6 Mon Sep 17 00:00:00 2001 From: liellnima Date: Sat, 26 Oct 2024 02:20:13 +0200 Subject: [PATCH 07/38] remove restricting funcs, extend to broader model set, extend to broader experiment/scenario set, remove some defaults that result in unituitive results, add some failure points where needed, add naive approach for scenario handling --- climateset/download/downloader.py | 122 ++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 30 deletions(-) diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index e6220f2..0a75a7e 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -4,17 +4,15 @@ from pyesgf.search import SearchConnection +import climateset.download.constants.cmip6_constants as cmip6_constants +import climateset.download.constants.cmip6plus_constants as cmip6plus_constants +import climateset.download.constants.input4mips_constants as input4mips_constants from climateset import RAW_DATA from climateset.download.constants.data_constants import ( EMISSIONS_ENDINGS, META_ENDINGS_PRC, META_ENDINGS_SHAR, ) -from climateset.download.constants.esgf_server import ( - MODEL_SOURCES, - SUPPORTED_EXPERIMENTS, - VAR_SOURCE_LOOKUP, -) from climateset.download.utils import ( _handle_base_search_constraints, download_metadata_variable, @@ -37,6 +35,7 @@ class Downloader: # TODO Fix complexity issue def __init__( # noqa: C901 self, + project: str = "CMIP6", # default as in ClimateBench model: Union[str, None] = "NorESM2-LM", # default as in ClimateBench experiments: list[str] = None, # sub-selection of ClimateBench default variables: list[str] = None, @@ -53,6 +52,8 @@ def __init__( # noqa: C901 Init method for the Downloader. Args: + project (str): Which categorie the data belongs to. Can be: CMIP6, CMIP6Plus, E3SM, input4mips, obs4mips, and more. + To date, only CMIP6, and input4mips are supported. model: Model ID from which output should be downloaded. A list of all supported model ids can be found in parameters.constants.MODEL_SOURCES. Model data only. experiments: List of simulations from which data should be downloaded. Model data only. @@ -67,18 +68,11 @@ def __init__( # noqa: C901 """ # Args init self.logger = logger + # init global variables depending on project type + self._init_globs(project) + self.project: str = project self.model: str = model self.model_node_link: str = "" - if experiments is None: - experiments = [ - "historical", - "ssp370", - "hist-GHG", - "piControl", - "ssp434", - "ssp126", - ] - # TODO: have a list of supported experiments before trying to look for them on the node # to reduce computation cost self.experiments: list[str] = experiments self.raw_vars: list[str] = [] @@ -93,14 +87,52 @@ def __init__( # noqa: C901 self.download_metafiles: bool = download_metafiles self.download_biomass_burning: bool = download_biomassburning self.use_plain_emission_vars: bool = use_plain_emission_vars + self.model_node_link = self.NODE_LINK # if max ensemble member number is too large --> we are relying on the server to complain? + self._check_desired_params() + # Args processing self._handle_variables( variables=variables, ) - self._handle_model_params() + # self._handle_model_params() + + # TODO we need to make the downloader an abstract parent class + # each project needs its own constant file + downloader function, the rest stays the same + # this function should not be done this way, this is the first naive approach + def _init_globs(self, project: str): + """Load globs depending on project.""" + if project == "CMIP6": + self.MODEL_SOURCES = cmip6_constants.MODEL_SOURCES + self.SUPPORTED_EXPERIMENTS = cmip6_constants.SUPPORTED_EXPERIMENTS + self.VAR_SOURCE_LOOKUP = cmip6_constants.VAR_SOURCE_LOOKUP + self.NODE_LINK = cmip6_constants.NODE_LINK + elif project == "input4mips": + self.NODE_LINK = input4mips_constants.NODE_LINK + elif project == "CMIP6Plus": + self.MODEL_SOURCES = cmip6plus_constants.MODEL_SOURCES + self.SUPPORTED_EXPERIMENTS = cmip6plus_constants.SUPPORTED_EXPERIMENTS + self.VAR_SOURCE_LOOKUP = cmip6plus_constants.VAR_SOURCE_LOOKUP + self.NODE_LINK = cmip6plus_constants.NODE_LINK + else: + self.logger.info(f"Project {project} has not been implemented in the Downloader yet.") + raise NotImplementedError(f"Project {project} has not been implemented in the downloader.") + + def _check_desired_params(self): + """Check if the desired params exist.""" + # check model + if self.model not in self.MODEL_SOURCES: + self.logger.info(f"WARNING: Model {self.model} unknown.") + raise ValueError( + f"Model {self.model} is not in the list of supported models. Consider adding manually to esgf_server.py" + ) + + # check experiments + # loop over experiments and check for each experiment in the list + + # check variables def _handle_variables(self, variables: list[str]): self._generate_variables(variables=variables) @@ -113,15 +145,18 @@ def _handle_variables(self, variables: list[str]): self.logger.info(f"Downloading meta vars:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}") def _handle_model_params(self): + # check if model, variable, and experiment exists try: - self.model_node_link = MODEL_SOURCES[self.model]["node_link"] + self.model_node_link = self.MODEL_SOURCES[self.model]["node_link"] except KeyError: if self.model is not None: self.logger.info(f"WARNING: Model {self.model} unknown.") - # TODO cause an error here and exit (move to next download item) - self.model = next(iter(MODEL_SOURCES)) - self.logger.info(f"Using : {self.model}") - self.model_node_link = MODEL_SOURCES[self.model]["node_link"] + raise ValueError( + "Model {} is not in the list of supported models. Consider adding manually to esgf_server.py".format( + self.model + ) + ) + self.model_node_link = self.MODEL_SOURCES[self.model]["node_link"] def _generate_plain_emission_vars(self): if self.use_plain_emission_vars: @@ -175,7 +210,7 @@ def _generate_variables(self, variables: list[str]): variables = [v.replace(" ", "_").replace("-", "_") for v in variables] self.logger.info(f"Cleaned variables : {variables}") for v in variables: - t = get_keys_from_value(d=VAR_SOURCE_LOOKUP, val=v, logger=self.logger) + t = get_keys_from_value(d=self.VAR_SOURCE_LOOKUP, val=v, logger=self.logger) if t == "model": self.model_vars.append(v) elif t == "raw": @@ -227,10 +262,16 @@ def download_from_model_single_var( # noqa: C901 ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) - # CONTINUE DEBUGGING HERE - variants = list(ctx.facet_counts["variant_label"]) + if len(variants) < 1: + self.logger.info( + "No items were found for this request. Please check on the esgf server if the combination of your model/scenarios/variables exists." + ) + raise ValueError( + "Downloader did not find any items on esgf for your request with: Project {project}, Experiment {experiment}, Model {self.model}, Variable {variable}." + ) + self.logger.info(f"Available variants : {variants}\n") self.logger.info(f"Length : {len(variants)}") @@ -390,13 +431,13 @@ def download_from_model(self): for variable in self.model_vars: self.logger.info(f"Downloading data for variable: {variable}") for experiment in self.experiments: - if experiment in SUPPORTED_EXPERIMENTS: + if experiment in self.SUPPORTED_EXPERIMENTS: self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var(variable=variable, experiment=experiment) + self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) else: self.logger.info( f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{SUPPORTED_EXPERIMENTS}. Skipping." + f"{self.SUPPORTED_EXPERIMENTS}. Skipping." ) def download_raw_input(self): @@ -448,6 +489,13 @@ def download_from_config_file(config: str, logger: logging.Logger = LOGGER): if isinstance(config, str): config = pathlib.Path(config) config = get_yaml_config(config) + try: + project = config["project"] + except KeyError as e: + logger.warning( + f"No project specified. Assuming CMIP6 data should be downloaded. Caught the following exception: {e}" + ) + project = "CMIP6" try: models = config["models"] except KeyError as e: @@ -456,8 +504,22 @@ def download_from_config_file(config: str, logger: logging.Logger = LOGGER): models = [None] downloader_kwargs = config["downloader_kwargs"] logger.info(f"Downloader kwargs : {downloader_kwargs}") - for m in models: - downloader = Downloader(model=m, **downloader_kwargs, logger=logger) + + # TODO @Francis I think we need to implement an abstract Downloader. + # Each project should get its own Downloader: CMIPXDownloader, input4mipsDownloader, etc. + # These classes only need to implement the different downloading functions needed for their specific datasets. + # Here, I am just doing the naive way with the stuff we have right now: + if project == "input4mips": + downloader = Downloader(project=project, model=models, **downloader_kwargs, logger=logger) downloader.download_raw_input() - if m is not None: + elif (project == "CMIP6") or (project == "CMIP6Plus"): + for m in models: + downloader = Downloader(project=project, model=m, **downloader_kwargs, logger=logger) downloader.download_from_model() + else: + logger.info( + f"Project {project} is not supported. Consider implementing your own downloader childclass for this." + ) + raise ValueError( + f"Project {project} is not supported. Currently supported projects are: CMIP6, CMIP6Plus, input4mips." + ) From b8680336d48afeae19280101d79d94b2a2eecfcc Mon Sep 17 00:00:00 2001 From: liellnima Date: Tue, 19 Nov 2024 20:23:26 +0100 Subject: [PATCH 08/38] move constants into constant classes, and collect them in a dict in esgf.py. split up raw and model vars. remove unused constants. --- climateset/download/constants/cmip6.py | 934 +++++++++++++++++ climateset/download/constants/cmip6plus.py | 26 + .../download/constants/cmip6plus_constants.py | 26 - .../download/constants/data_constants.py | 18 - climateset/download/constants/esgf.py | 17 + .../{cmip6_constants.py => input4mips.py} | 978 +----------------- .../constants/input4mips_constants.py | 1 - 7 files changed, 1019 insertions(+), 981 deletions(-) create mode 100644 climateset/download/constants/cmip6.py create mode 100644 climateset/download/constants/cmip6plus.py delete mode 100644 climateset/download/constants/cmip6plus_constants.py delete mode 100644 climateset/download/constants/data_constants.py create mode 100644 climateset/download/constants/esgf.py rename climateset/download/constants/{cmip6_constants.py => input4mips.py} (51%) delete mode 100644 climateset/download/constants/input4mips_constants.py diff --git a/climateset/download/constants/cmip6.py b/climateset/download/constants/cmip6.py new file mode 100644 index 0000000..e32276c --- /dev/null +++ b/climateset/download/constants/cmip6.py @@ -0,0 +1,934 @@ +# TODO remove raw variables from here +class Cmip6Constants: + """ + Attributes: + NODE_LINK (str): Where the data can be accessed + MODEL_SOURCES (List): Identifiers for supported climate models + VAR_SOURCE_LOOKUP (Dict>): model and raw variables + SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + """ + + # The values here have been retrieved from here: + # https://wcrp-cmip.org/cmip-data-access/ + + # This entry node link is automatically changing to other nodes + NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" + + # Supported Model sources + MODEL_SOURCES = [ + "4AOP-v1-5", + "ACCESS-CM2", + "ACCESS-ESM1-5", + "ACCESS-OM2", + "ACCESS-OM2-025", + "ARTS-2-3", + "AWI-CM-1-1-HR", + "AWI-CM-1-1-LR", + "AWI-CM-1-1-MR", + "AWI-ESM-1-1-LR", + "AWI-ESM-2-1-LR", + "BCC-CSM2-HR", + "BCC-CSM2-MR", + "BCC-ESM1", + "CAM-MPAS-HR", + "CAM-MPAS-LR", + "CAMS-CSM1-0", + "CanESM5", + "CanESM5-1", + "CanESM5-CanOE", + "CAS-ESM2-0", + "CESM1-1-CAM5-CMIP5", + "CESM1-CAM5-SE-HR", + "CESM1-CAM5-SE-LR", + "CESM1-WACCM-SC", + "CESM2", + "CESM2-FV2", + "CESM2-WACCM", + "CESM2-WACCM-FV2", + "CIESM", + "CMCC-CM2-HR4", + "CMCC-CM2-SR5", + "CMCC-CM2-VHR4", + "CMCC-ESM2", + "CNRM-CM6-1", + "CNRM-CM6-1-HR", + "CNRM-ESM2-1", + "E3SM-1-0", + "E3SM-1-1", + "E3SM-1-1-ECA", + "E3SM-2-0", + "EC-Earth3", + "EC-Earth3-AerChem", + "EC-Earth3-CC", + "EC-Earth3-GrIS", + "EC-Earth3-HR", + "EC-Earth3-LR", + "EC-Earth3-Veg", + "EC-Earth3-Veg-LR", + "EC-Earth3P", + "EC-Earth3P-HR", + "EC-Earth3P-VHR", + "ECMWF-IFS-HR", + "ECMWF-IFS-LR", + "ECMWF-IFS-MR", + "FGOALS-f3-H", + "FGOALS-f3-L", + "FGOALS-g3", + "FIO-ESM-2-0", + "GFDL-AM4", + "GFDL-CM4", + "GFDL-CM4C192", + "GFDL-ESM2M", + "GFDL-ESM4", + "GFDL-GRTCODE", + "GFDL-OM4p5B", + "GFDL-RFM-DISORT", + "GISS-E2-1-G", + "GISS-E2-1-G-CC", + "GISS-E2-1-H", + "GISS-E2-2-G", + "GISS-E2-2-H", + "GISS-E3-G", + "HadGEM3-GC31-HH", + "HadGEM3-GC31-HM", + "HadGEM3-GC31-LL", + "HadGEM3-GC31-LM", + "HadGEM3-GC31-MH", + "HadGEM3-GC31-MM", + "HiRAM-SIT-HR", + "HiRAM-SIT-LR", + "ICON-ESM-LR", + "IITM-ESM", + "INM-CM4-8", + "INM-CM5-0", + "INM-CM5-H", + "IPSL-CM5A2-INCA", + "IPSL-CM6A-ATM-HR", + "IPSL-CM6A-ATM-ICO-HR", + "IPSL-CM6A-ATM-ICO-LR", + "IPSL-CM6A-ATM-ICO-MR", + "IPSL-CM6A-ATM-ICO-VHR", + "IPSL-CM6A-ATM-LR-REPROBUS", + "IPSL-CM6A-LR", + "IPSL-CM6A-LR-INCA", + "IPSL-CM6A-MR1", + "KACE-1-0-G", + "KIOST-ESM", + "LBLRTM-12-8", + "MCM-UA-1-0", + "MIROC-ES2H", + "MIROC-ES2H-NB", + "MIROC-ES2L", + "MIROC6", + "MPI-ESM-1-2-HAM", + "MPI-ESM1-2-HR", + "MPI-ESM1-2-LR", + "MPI-ESM1-2-XR", + "MRI-AGCM3-2-H", + "MRI-AGCM3-2-S", + "MRI-ESM2-0", + "NESM3", + "NICAM16-7S", + "NICAM16-8S", + "NICAM16-9S", + "NorCPM1", + "NorESM1-F", + "NorESM2-LM", + "NorESM2-MH", + "RRTMG-LW-4-91", + "RRTMG-SW-4-02", + "RTE-RRTMGP-181204", + "SAM0-UNICON", + "TaiESM1", + "TaiESM1-TIMCOM", + "TaiESM1-TIMCOM2", + "UKESM1-0-LL", + "UKESM1-1-LL", + "UKESM1-ice-LL", + "E3SM-2-0-NARRM", + "E3SM-2-1", + "EC-Earth3-ESM-1", + "PCMDI-test-1-0", + ] + + VAR_SOURCE_LOOKUP = [ + "ztp", + "zsatcalc", + "zsatarag", + "zostoga", + "zossq", + "zos", + "zoocos", + "zooc", + "zo2min", + "zhalfo", + "zg500", + "zg1000", + "zg100", + "zg10", + "zg", + "zfullo", + "wtd", + "wo", + "wmo", + "wfonocorr", + "wfo", + "wetss", + "wetso4", + "wetso2", + "wetlandFrac", + "wetlandCH4", + "wetbc", + "wap500", + "wap", + "vsf", + "volo", + "volcello", + "vo", + "vmo", + "vegHeight", + "va", + "uo", + "umo", + "ua", + "tslsi", + "tsl", + "ts", + "tran", + "tossq", + "tosga", + "tos", + "tob", + "thkcello", + "thetaot700", + "thetaot300", + "thetaot2000", + "thetaot", + "thetaoga", + "thetao", + "tgs", + "tcs", + "tauvo", + "tauv", + "tauuo", + "tauu", + "tasmin", + "tasmax", + "tas", + "talkos", + "talknat", + "talk", + "ta850", + "ta700", + "ta500", + "ta", + "t20d", + "spco2", + "sossq", + "sosga", + "sos", + "sootsn", + "somint", + "soga", + "sob", + "so2", + "so", + "snw", + "sndmasswindrif", + "sndmasssnf", + "sndmasssi", + "sndmassmelt", + "snd", + "snc", + "sivols", + "sivoln", + "sivol", + "siv", + "siu", + "sitimefrac", + "sithick", + "sitemptop", + "sitempsnic", + "sitempbot", + "sistryubot", + "sistrydtop", + "sistrxubot", + "sistrxdtop", + "sispeed", + "sisnthick", + "sisnmass", + "sisnhc", + "sisnconc", + "sirdgthick", + "sirdgconc", + "sipr", + "sios", + "simpconc", + "simass", + "siitdthick", + "siitdsnthick", + "siitdsnconc", + "siitdconc", + "sihc", + "siforcetilty", + "siforcetiltx", + "siforceintstry", + "siforceintstrx", + "siforcecorioly", + "siforcecoriolx", + "siflswutop", + "siflswdtop", + "siflswdbot", + "siflsensupbot", + "siflsenstop", + "sifllwutop", + "sifllwdtop", + "sifllatstop", + "siflfwdrain", + "siflfwbot", + "siflcondtop", + "siflcondbot", + "sifb", + "siextents", + "siextentn", + "sidmasstrany", + "sidmasstranx", + "sidmassth", + "sidmasssi", + "sidmassmelttop", + "sidmassmeltbot", + "sidmasslat", + "sidmassgrowthwat", + "sidmassgrowthbot", + "sidmassevapsubl", + "sidmassdyn", + "sidivvel", + "sidconcth", + "sidconcdyn", + "siconc", + "sicompstren", + "siarean", + "siage", + "si", + "sftof", + "sftlf", + "sftgif", + "sfdsi", + "sfcWind", + "sf6", + "rtmt", + "rsutcsaf", + "rsutcs", + "rsutaf", + "rsut", + "rsuscs", + "rsus", + "rsntds", + "rsdt", + "rsdsdiff", + "rsdscs", + "rsds", + "rlutcsaf", + "rlutcs", + "rlutaf", + "rlut", + "rlus", + "rldscs", + "rlds", + "rh", + "reffclwtop", + "ra", + "rMaint", + "rGrowth", + "qgwr", + "pso", + "psl", + "ps", + "prw", + "prveg", + "prsn", + "prra", + "prc", + "pr", + "ppos", + "pp", + "popos", + "pop", + "ponos", + "pon", + "po4os", + "po4", + "phynos", + "phyn", + "phyfeos", + "phyfe", + "phyc", + "phos", + "phnat", + "phalf", + "ph", + "pfull", + "pctisccp", + "pbo", + "orog", + "opottempmint", + "oh", + "od870aer", + "od550ss", + "od550so4", + "od550oa", + "od550lt1aer", + "od550dust", + "od550csaer", + "od550bc", + "od550aerh2o", + "od550aer", + "od440aer", + "obvfsq", + "o3", + "o2satos", + "o2sat", + "o2os", + "o2min", + "o2", + "nppWood", + "nppRoot", + "nppLeaf", + "npp", + "no3os", + "no3", + "nep", + "nbp", + "nVeg", + "nStem", + "nSoil", + "nRoot", + "nMineralNO3", + "nMineralNH4", + "nMineral", + "nLitter", + "nLeaf", + "nLand", + "n2oglobal", + "msftmzmpa", + "msftmz", + "msftmrhompa", + "msftmrho", + "msftbarot", + "mrtws", + "mrsos", + "mrsol", + "mrso", + "mrsll", + "mrsfl", + "mrros", + "mrrob", + "mrro", + "mrlso", + "mrfso", + "mmrss", + "mmrsoa", + "mmrso4", + "mmrpm2p5", + "mmrpm1", + "mmroa", + "mmrdust", + "mmrbc", + "mmraerh2o", + "mlotstsq", + "mlotstmin", + "mlotstmax", + "mlotst", + "mfo", + "masso", + "masscello", + "lwsnl", + "lwp", + "loadss", + "loaddust", + "lai", + "isop", + "intpp", + "intpoc", + "intpn2", + "intdoc", + "intdic", + "huss", + "hus", + "hurs", + "hur", + "hfy", + "hfx", + "hfss", + "hfls", + "hfds", + "hfbasinpmdiff", + "hfbasinpmadv", + "hfbasinpadv", + "hfbasin", + "gpp", + "fsitherm", + "froc", + "frn", + "friver", + "fric", + "frfe", + "ficeberg", + "fgo2", + "fgdms", + "fgco2nat", + "fgco2", + "fVegLitterSenescence", + "fVegLitterMortality", + "fVegLitter", + "fNup", + "fNnetmin", + "fNloss", + "fNleach", + "fNgasNonFire", + "fNgasFire", + "fNgas", + "fNfert", + "fNdep", + "fNProduct", + "fNOx", + "fN2O", + "fLuc", + "fLitterFire", + "fHarvestToProduct", + "fHarvest", + "fFireNat", + "fFire", + "fDeforestToProduct", + "fBNF", + "evspsblveg", + "evspsblsoi", + "evspsbl", + "evs", + "esn", + "es", + "epsi100", + "epp100", + "epn100", + "epfe100", + "epcalc100", + "epc100", + "emivoc", + "emiss", + "emiso4", + "emiso2", + "emioa", + "emiisop", + "emidust", + "emidms", + "emibvoc", + "emibc", + "ec", + "dryso4", + "dryso2", + "drybc", + "dpco2", + "dmsos", + "dms", + "dmlt", + "dissocos", + "dissoc", + "dissicos", + "dissicnat", + "dissic", + "dfeos", + "dfe", + "detocos", + "detoc", + "deptho", + "cod", + "co3satcalcos", + "co3satcalc", + "co3sataragos", + "co3satarag", + "co3os", + "co3nat", + "co3", + "co2mass", + "co2", + "clwvi", + "clwmodis", + "clw", + "cltmodis", + "cltisccp", + "cltcalipso", + "clt", + "clmcalipso", + "cllcalipso", + "clivi", + "climodis", + "cli", + "clhcalipso", + "cl", + "chlos", + "chl", + "chepsoa", + "ch4global", + "cfc12global", + "cfc12", + "cfc11global", + "cfc11", + "cdnc", + "cct", + "ccn", + "ccb", + "calcos", + "calc", + "cWood", + "cVeg", + "cStem", + "cSoilSlow", + "cSoilMedium", + "cSoilFast", + "cSoilAbove1m", + "cSoil", + "cRoot", + "cMisc", + "cLitter", + "cLeaf", + "cLand", + "cCwd", + "bsios", + "bsi", + "bldep", + "bfeos", + "bfe", + "basin", + "ares", + "areacello", + "areacella", + "albisccp", + "airmass", + "agessc", + "abs550aer", + ] + + SUPPORTED_EXPERIMENTS = [ + "hist-1950HC", + "lfmip-pdLC", + "ssp126", + "ssp126-ssp370Lu", + "ssp245", + "ssp370", + "ssp370-lowNTCF", + "ssp370-ssp126Lu", + "ssp370SST", + "ssp370SST-lowCH4", + "ssp370SST-lowNTCF", + "ssp370SST-ssp126Lu", + "ssp585", + "hist-resAMO", + "hist-resIPO", + "historical-ext", + "lfmip-initLC", + "lfmip-pdLC-cruNcep", + "lfmip-pdLC-princeton", + "lfmip-pdLC-wfdei", + "lfmip-rmLC", + "lfmip-rmLC-cruNcep", + "lfmip-rmLC-princeton", + "lfmip-rmLC-wfdei", + "pa-futAntSIC", + "pa-futArcSIC", + "pa-pdSIC", + "pa-piAntSIC", + "pa-piArcSIC", + "ssp119", + "ssp370pdSST", + "ssp370SST-lowAer", + "ssp370SST-lowBC", + "ssp370SST-lowO3", + "ssp434", + "ssp460", + "dcppC-atl-pacemaker", + "dcppC-pac-pacemaker", + "pa-futAntSIC-ext", + "pa-futArcSIC-ext", + "pa-pdSIC-ext", + "ssp370-lowNTCFCH4", + "ssp370SST-lowNTCFCH4", + "volc-cluster-21C", + "yr2010CO2", + "dcppA-historical-niff", + "1pctCO2", + "1pctCO2-bgc", + "abrupt-4xCO2", + "dcppC-amv-neg", + "dcppC-amv-pos", + "dcppC-atl-control", + "dcppC-ipv-neg", + "dcppC-ipv-pos", + "dcppC-pac-control", + "deforest-globe", + "faf-heat", + "faf-heat-NA50pct", + "faf-stress", + "faf-water", + "G1", + "hist-aer", + "hist-GHG", + "hist-nat", + "hist-noLu", + "hist-piNTCF", + "hist-spAer-all", + "histSST", + "histSST-noLu", + "histSST-piCH4", + "histSST-piNTCF", + "piClim-4xCO2", + "piClim-aer", + "piClim-anthro", + "piClim-CH4", + "piClim-control", + "piClim-ghg", + "piClim-HC", + "piClim-lu", + "piClim-NTCF", + "volc-long-eq", + "volc-pinatubo-full", + "volc-pinatubo-strat", + "volc-pinatubo-surf", + "1pctCO2-rad", + "1pctCO2Ndep", + "1pctCO2Ndep-bgc", + "abrupt-0p5xCO2", + "abrupt-2xCO2", + "abrupt-solm4p", + "abrupt-solp4p", + "dcppC-amv-ExTrop-neg", + "dcppC-amv-ExTrop-pos", + "dcppC-amv-Trop-neg", + "dcppC-amv-Trop-pos", + "dcppC-ipv-NexTrop-neg", + "dcppC-ipv-NexTrop-pos", + "faf-all", + "faf-antwater-stress", + "faf-heat-NA0pct", + "faf-passiveheat", + "hist-bgc", + "hist-piAer", + "hist-spAer-aer", + "hist-stratO3", + "histSST-piAer", + "histSST-piN2O", + "histSST-piO3", + "piClim-2xdust", + "piClim-2xss", + "piClim-BC", + "piClim-histaer", + "piClim-histall", + "piClim-histghg", + "piClim-histnat", + "piClim-N2O", + "piClim-O3", + "piClim-spAer-aer", + "piClim-spAer-anthro", + "piClim-spAer-histaer", + "piClim-spAer-histall", + "piSST-4xCO2-solar", + "volc-cluster-ctrl", + "volc-long-hlN", + "hist-all-aer2", + "hist-all-nat2", + "hist-CO2", + "hist-sol", + "hist-totalO3", + "hist-volc", + "piClim-2xDMS", + "piClim-2xfire", + "piClim-2xNOx", + "piClim-2xVOC", + "piClim-NH3", + "piClim-NOx", + "piClim-OC", + "piClim-SO2", + "piClim-VOC", + "volc-long-hlS", + "histSST-1950HC", + "esm-ssp585", + "esm-ssp585-ssp126Lu", + "esm-hist-ext", + "ssp534-over-bgc", + "ssp585-bgc", + "esm-1pct-brch-1000PgC", + "esm-1pct-brch-750PgC", + "esm-1pct-brch-2000PgC", + "esm-hist", + "esm-pi-cdr-pulse", + "esm-pi-CO2pulse", + "esm-1pctCO2", + "esm-bell-750PgC", + "esm-bell-1000PgC", + "esm-bell-2000PgC", + "esm-yr2010CO2-control", + "1pctCO2-4xext", + "1pctCO2-cdr", + "esm-ssp534-over", + "esm-ssp585-ocn-alk", + "esm-ssp585ext", + "esm-ssp585-ocn-alk-stop", + "esm-ssp585-ssp126Lu-ext", + "esm-yr2010CO2-cdr-pulse", + "esm-yr2010CO2-CO2pulse", + "esm-yr2010CO2-noemit", + "amip", + "amip-4xCO2", + "amip-future4K", + "amip-hist", + "amip-p4K", + "aqua-4xCO2", + "aqua-control", + "aqua-p4K", + "highresSST-present", + "ism-ctrl-std", + "ism-pdControl-std", + "ism-piControl-self", + "land-hist", + "land-hist-altStartYear", + "land-noLu", + "land-ssp126", + "land-ssp585", + "lgm", + "lig127k", + "midHolocene", + "midPliocene-eoi400", + "omip1", + "past1000", + "piControl-withism", + "rad-irf", + "a4SST", + "a4SSTice", + "a4SSTice-4xCO2", + "amip-a4SST-4xCO2", + "amip-lfmip-pdLC", + "amip-lfmip-pObs", + "amip-lfmip-rmLC", + "amip-lwoff", + "amip-m4K", + "amip-p4K-lwoff", + "amip-piForcing", + "aqua-control-lwoff", + "aqua-p4K-lwoff", + "dcppA-assim", + "esm-piControl-spinup", + "land-cClim", + "land-cCO2", + "land-crop-grass", + "land-crop-noFert", + "land-crop-noIrrig", + "land-crop-noIrrigFert", + "land-hist-altLu1", + "land-hist-altLu2", + "land-hist-cruNcep", + "land-hist-princeton", + "land-hist-wfdei", + "land-noFire", + "land-noPasture", + "land-noShiftCultivate", + "land-noWoodHarv", + "land-ssp434", + "omip1-spunup", + "past1000-solaronly", + "past1000-volconly", + "piControl-spinup", + "piControl-spinup-cmip5", + "piSST", + "piSST-4xCO2", + "piSST-4xCO2-rad", + "piSST-pxK", + "spinup-1950", + "amip-hld", + "amip-TIP", + "amip-TIP-nosh", + "control-slab", + "dcppC-atl-spg", + "esm-past1000", + "ism-lig127k-std", + "omip2", + "omip2-spunup", + "past2k", + "esm-piControl", + "historical", + "historical-cmip5", + "hist-aer-cmip5", + "hist-GHG-cmip5", + "hist-nat-cmip5", + "piControl", + "piControl-cmip5", + "ssp245-aer", + "ssp245-cov-strgreen", + "ssp245-covid", + "ssp245-cov-aer", + "ssp245-cov-fossil", + "ssp245-cov-GHG", + "ssp245-cov-modgreen", + "ssp245-GHG", + "ssp245-nat", + "ssp245-stratO3", + "dcppA-hindcast", + "dcppB-forecast", + "dcppC-forecast-addPinatubo", + "dcppC-hindcast-noPinatubo", + "dcppC-hindcast-noAgung", + "dcppC-hindcast-noElChichon", + "dcppC-forecast-addAgung", + "dcppC-forecast-addElChichon", + "dcppA-hindcast-niff", + "futureSST-4xCO2-solar", + "G6solar", + "G6sulfur", + "G6SST1", + "G7cirrus", + "G7SST1-cirrus", + "ssp534-over", + "G6SST2-solar", + "G6SST2-sulfur", + "G7SST2-cirrus", + "control-1950", + "hist-1950", + "highres-future", + "highresSST-4xCO2", + "highresSST-future", + "highresSST-LAI", + "highresSST-p4K", + "highresSST-smoothed", + "1pctCO2to4x-withism", + "historical-withism", + "ism-1pctCO2to4x-self", + "ism-historical-self", + "ism-1pctCO2to4x-std", + "ism-historical-std", + "ism-asmb-std", + "ism-bsmb-std", + "ism-amip-std", + "ism-ssp585-self", + "ism-ssp585-std", + "ssp585-withism", + "pdSST-futAntSIC", + "pdSST-futArcSIC", + "pdSST-pdSIC", + "pdSST-piAntSIC", + "pdSST-piArcSIC", + "piSST-pdSIC", + "futSST-pdSIC", + "piSST-piSIC", + "amip-climSIC", + "amip-climSST", + "modelSST-futArcSIC", + "modelSST-pdSIC", + "pdSST-futArcSICSIT", + "pdSST-futBKSeasSIC", + "pdSST-futOkhotskSIC", + "pdSST-pdSICSIT", + "rcp26-cmip5", + "rcp45-cmip5", + "rcp60-cmip5", + "rcp85-cmip5", + "volc-cluster-mill", + "volc-pinatubo-slab", + ] diff --git a/climateset/download/constants/cmip6plus.py b/climateset/download/constants/cmip6plus.py new file mode 100644 index 0000000..a883cd1 --- /dev/null +++ b/climateset/download/constants/cmip6plus.py @@ -0,0 +1,26 @@ +# TODO remove raw variables from here +class Cmip6plusConstants: + """ + Attributes: + NODE_LINK (str): Where the data can be accessed + MODEL_SOURCES (List): Identifiers for supported climate models + VAR_SOURCE_LOOKUP (Dict>): model and raw variables + SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + """ + + NODE_LINK = "http://esgf-data2.llnl.gov" + + MODEL_SOURCES = [ + "HasGEM3-GC31-LL", + ] + + VAR_SOURCE_LOOKUP = [ + "areacella", + "mrsofc", + ] + + SUPPORTED_EXPERIMENTS = [ + "hist-lu", + "hist-piAer", + "hist-piVolc", + ] diff --git a/climateset/download/constants/cmip6plus_constants.py b/climateset/download/constants/cmip6plus_constants.py deleted file mode 100644 index 1f20a8b..0000000 --- a/climateset/download/constants/cmip6plus_constants.py +++ /dev/null @@ -1,26 +0,0 @@ -NODE_LINK = "http://esgf-data2.llnl.gov" - -MODEL_SOURCES = [ - "HasGEM3-GC31-LL", -] - -VAR_SOURCE_LOOKUP = { - "model": [ - "areacella", - "mrsofc", - ], - "raw": [ - "areacella", - "mrsofc", - ], -} - -SUPPORTED_EXPERIMENTS = [ - "hist-lu", - "hist-piAer", - "hist-piVolc", -] - -GRIDDING_HIERACHY = ["gn"] - -RES_TO_CHUNKSIZE = {"year": 1, "mon": 12, "6hr": 1460, "3hr": 2920, "day": 364} diff --git a/climateset/download/constants/data_constants.py b/climateset/download/constants/data_constants.py deleted file mode 100644 index ff89eb1..0000000 --- a/climateset/download/constants/data_constants.py +++ /dev/null @@ -1,18 +0,0 @@ -EMISSIONS_ENDINGS = ["_em_openburning", "_em_anthro", "_em_AIR_anthro"] - -META_ENDINGS_PRC = [ - "_percentage_AGRI", - "_percentage_BORF", - "_percentage_DEFO", - "_percentage_PEAT", - "_percentage_SAVA", - "_percentage_TEMF", -] -META_ENDINGS_SHAR = ["_openburning_share"] - -LON_LAT_TO_GRID_SIZE = { - (720, 360): "25_km", - (360, 720): "25_km", - (96, 144): "250_km", - (144, 96): "250_km", -} diff --git a/climateset/download/constants/esgf.py b/climateset/download/constants/esgf.py new file mode 100644 index 0000000..879d946 --- /dev/null +++ b/climateset/download/constants/esgf.py @@ -0,0 +1,17 @@ +from .cmip6 import Cmip6Constants +from .cmip6plus import Cmip6plusConstants +from .input4mips import Input4mipsConstants + +# constant classes for esgf projects implemented here +# add your own esgf project for downloading to download/constants/ and add the constant class to the dict and lists here +ESGF_PROJECTS = { + "CMIP6": Cmip6Constants, + "CMIP6Plus": Cmip6plusConstants, + "input4MIPs": Input4mipsConstants, +} + +# datasets that provide inputs to climate models +ESGF_RAW_INPUT_LIST = ["input4MIPs"] + +# datasets that provide outputs from climate models +ESGF_MODEL_OUTPUT_LIST = ["CMIP6", "CMIP6Plus"] diff --git a/climateset/download/constants/cmip6_constants.py b/climateset/download/constants/input4mips.py similarity index 51% rename from climateset/download/constants/cmip6_constants.py rename to climateset/download/constants/input4mips.py index ed4606c..ba78c50 100644 --- a/climateset/download/constants/cmip6_constants.py +++ b/climateset/download/constants/input4mips.py @@ -1,605 +1,48 @@ -# The values here have been retrieved from here: -# https://wcrp-cmip.org/cmip-data-access/ +# TODO add VAR_SOURCE_LOOKUP with raw variables +# TODO add supported experiments +# TODO do we really need emission endings, meta_endings_prc, meta_endings_shar?? how is this used so far? +class Input4mipsConstants: + """ + Attributes: + NODE_LINK (str): Where the data can be accessed + EMISSION_ENDINGS (List): + META_ENDINGS_PRC (List): + META_ENDINGS_SHARE (List): + VAR_SOURCE_LOOKUP (Dict>): model and raw variables + """ -# This entry node link is automatically changing to other nodes -NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" + NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" -# Supported Model sources + EMISSIONS_ENDINGS = ["_em_openburning", "_em_anthro", "_em_AIR_anthro"] -MODEL_SOURCES = [ - "4AOP-v1-5", - "ACCESS-CM2", - "ACCESS-ESM1-5", - "ACCESS-OM2", - "ACCESS-OM2-025", - "ARTS-2-3", - "AWI-CM-1-1-HR", - "AWI-CM-1-1-LR", - "AWI-CM-1-1-MR", - "AWI-ESM-1-1-LR", - "AWI-ESM-2-1-LR", - "BCC-CSM2-HR", - "BCC-CSM2-MR", - "BCC-ESM1", - "CAM-MPAS-HR", - "CAM-MPAS-LR", - "CAMS-CSM1-0", - "CanESM5", - "CanESM5-1", - "CanESM5-CanOE", - "CAS-ESM2-0", - "CESM1-1-CAM5-CMIP5", - "CESM1-CAM5-SE-HR", - "CESM1-CAM5-SE-LR", - "CESM1-WACCM-SC", - "CESM2", - "CESM2-FV2", - "CESM2-WACCM", - "CESM2-WACCM-FV2", - "CIESM", - "CMCC-CM2-HR4", - "CMCC-CM2-SR5", - "CMCC-CM2-VHR4", - "CMCC-ESM2", - "CNRM-CM6-1", - "CNRM-CM6-1-HR", - "CNRM-ESM2-1", - "E3SM-1-0", - "E3SM-1-1", - "E3SM-1-1-ECA", - "E3SM-2-0", - "EC-Earth3", - "EC-Earth3-AerChem", - "EC-Earth3-CC", - "EC-Earth3-GrIS", - "EC-Earth3-HR", - "EC-Earth3-LR", - "EC-Earth3-Veg", - "EC-Earth3-Veg-LR", - "EC-Earth3P", - "EC-Earth3P-HR", - "EC-Earth3P-VHR", - "ECMWF-IFS-HR", - "ECMWF-IFS-LR", - "ECMWF-IFS-MR", - "FGOALS-f3-H", - "FGOALS-f3-L", - "FGOALS-g3", - "FIO-ESM-2-0", - "GFDL-AM4", - "GFDL-CM4", - "GFDL-CM4C192", - "GFDL-ESM2M", - "GFDL-ESM4", - "GFDL-GRTCODE", - "GFDL-OM4p5B", - "GFDL-RFM-DISORT", - "GISS-E2-1-G", - "GISS-E2-1-G-CC", - "GISS-E2-1-H", - "GISS-E2-2-G", - "GISS-E2-2-H", - "GISS-E3-G", - "HadGEM3-GC31-HH", - "HadGEM3-GC31-HM", - "HadGEM3-GC31-LL", - "HadGEM3-GC31-LM", - "HadGEM3-GC31-MH", - "HadGEM3-GC31-MM", - "HiRAM-SIT-HR", - "HiRAM-SIT-LR", - "ICON-ESM-LR", - "IITM-ESM", - "INM-CM4-8", - "INM-CM5-0", - "INM-CM5-H", - "IPSL-CM5A2-INCA", - "IPSL-CM6A-ATM-HR", - "IPSL-CM6A-ATM-ICO-HR", - "IPSL-CM6A-ATM-ICO-LR", - "IPSL-CM6A-ATM-ICO-MR", - "IPSL-CM6A-ATM-ICO-VHR", - "IPSL-CM6A-ATM-LR-REPROBUS", - "IPSL-CM6A-LR", - "IPSL-CM6A-LR-INCA", - "IPSL-CM6A-MR1", - "KACE-1-0-G", - "KIOST-ESM", - "LBLRTM-12-8", - "MCM-UA-1-0", - "MIROC-ES2H", - "MIROC-ES2H-NB", - "MIROC-ES2L", - "MIROC6", - "MPI-ESM-1-2-HAM", - "MPI-ESM1-2-HR", - "MPI-ESM1-2-LR", - "MPI-ESM1-2-XR", - "MRI-AGCM3-2-H", - "MRI-AGCM3-2-S", - "MRI-ESM2-0", - "NESM3", - "NICAM16-7S", - "NICAM16-8S", - "NICAM16-9S", - "NorCPM1", - "NorESM1-F", - "NorESM2-LM", - "NorESM2-MH", - "RRTMG-LW-4-91", - "RRTMG-SW-4-02", - "RTE-RRTMGP-181204", - "SAM0-UNICON", - "TaiESM1", - "TaiESM1-TIMCOM", - "TaiESM1-TIMCOM2", - "UKESM1-0-LL", - "UKESM1-1-LL", - "UKESM1-ice-LL", - "E3SM-2-0-NARRM", - "E3SM-2-1", - "EC-Earth3-ESM-1", - "PCMDI-test-1-0", -] + META_ENDINGS_PRC = [ + "_percentage_AGRI", + "_percentage_BORF", + "_percentage_DEFO", + "_percentage_PEAT", + "_percentage_SAVA", + "_percentage_TEMF", + ] -VAR_SOURCE_LOOKUP = { - "model": [ - "ztp", - "zsatcalc", - "zsatarag", - "zostoga", - "zossq", - "zos", - "zoocos", - "zooc", - "zo2min", - "zhalfo", - "zg500", - "zg1000", - "zg100", - "zg10", - "zg", - "zfullo", - "wtd", - "wo", - "wmo", - "wfonocorr", - "wfo", - "wetss", - "wetso4", - "wetso2", - "wetlandFrac", - "wetlandCH4", - "wetbc", - "wap500", - "wap", - "vsf", - "volo", - "volcello", - "vo", - "vmo", - "vegHeight", - "va", - "uo", - "umo", - "ua", - "tslsi", - "tsl", - "ts", - "tran", - "tossq", - "tosga", - "tos", - "tob", - "thkcello", - "thetaot700", - "thetaot300", - "thetaot2000", - "thetaot", - "thetaoga", - "thetao", - "tgs", - "tcs", - "tauvo", - "tauv", - "tauuo", - "tauu", - "tasmin", - "tasmax", - "tas", - "talkos", - "talknat", - "talk", - "ta850", - "ta700", - "ta500", - "ta", - "t20d", - "spco2", - "sossq", - "sosga", - "sos", - "sootsn", - "somint", - "soga", - "sob", - "so2", - "so", - "snw", - "sndmasswindrif", - "sndmasssnf", - "sndmasssi", - "sndmassmelt", - "snd", - "snc", - "sivols", - "sivoln", - "sivol", - "siv", - "siu", - "sitimefrac", - "sithick", - "sitemptop", - "sitempsnic", - "sitempbot", - "sistryubot", - "sistrydtop", - "sistrxubot", - "sistrxdtop", - "sispeed", - "sisnthick", - "sisnmass", - "sisnhc", - "sisnconc", - "sirdgthick", - "sirdgconc", - "sipr", - "sios", - "simpconc", - "simass", - "siitdthick", - "siitdsnthick", - "siitdsnconc", - "siitdconc", - "sihc", - "siforcetilty", - "siforcetiltx", - "siforceintstry", - "siforceintstrx", - "siforcecorioly", - "siforcecoriolx", - "siflswutop", - "siflswdtop", - "siflswdbot", - "siflsensupbot", - "siflsenstop", - "sifllwutop", - "sifllwdtop", - "sifllatstop", - "siflfwdrain", - "siflfwbot", - "siflcondtop", - "siflcondbot", - "sifb", - "siextents", - "siextentn", - "sidmasstrany", - "sidmasstranx", - "sidmassth", - "sidmasssi", - "sidmassmelttop", - "sidmassmeltbot", - "sidmasslat", - "sidmassgrowthwat", - "sidmassgrowthbot", - "sidmassevapsubl", - "sidmassdyn", - "sidivvel", - "sidconcth", - "sidconcdyn", - "siconc", - "sicompstren", - "siarean", - "siage", - "si", - "sftof", - "sftlf", - "sftgif", - "sfdsi", - "sfcWind", - "sf6", - "rtmt", - "rsutcsaf", - "rsutcs", - "rsutaf", - "rsut", - "rsuscs", - "rsus", - "rsntds", - "rsdt", - "rsdsdiff", - "rsdscs", - "rsds", - "rlutcsaf", - "rlutcs", - "rlutaf", - "rlut", - "rlus", - "rldscs", - "rlds", - "rh", - "reffclwtop", - "ra", - "rMaint", - "rGrowth", - "qgwr", - "pso", - "psl", - "ps", - "prw", - "prveg", - "prsn", - "prra", - "prc", - "pr", - "ppos", - "pp", - "popos", - "pop", - "ponos", - "pon", - "po4os", - "po4", - "phynos", - "phyn", - "phyfeos", - "phyfe", - "phyc", - "phos", - "phnat", - "phalf", - "ph", - "pfull", - "pctisccp", - "pbo", - "orog", - "opottempmint", - "oh", - "od870aer", - "od550ss", - "od550so4", - "od550oa", - "od550lt1aer", - "od550dust", - "od550csaer", - "od550bc", - "od550aerh2o", - "od550aer", - "od440aer", - "obvfsq", - "o3", - "o2satos", - "o2sat", - "o2os", - "o2min", - "o2", - "nppWood", - "nppRoot", - "nppLeaf", - "npp", - "no3os", - "no3", - "nep", - "nbp", - "nVeg", - "nStem", - "nSoil", - "nRoot", - "nMineralNO3", - "nMineralNH4", - "nMineral", - "nLitter", - "nLeaf", - "nLand", - "n2oglobal", - "msftmzmpa", - "msftmz", - "msftmrhompa", - "msftmrho", - "msftbarot", - "mrtws", - "mrsos", - "mrsol", - "mrso", - "mrsll", - "mrsfl", - "mrros", - "mrrob", - "mrro", - "mrlso", - "mrfso", - "mmrss", - "mmrsoa", - "mmrso4", - "mmrpm2p5", - "mmrpm1", - "mmroa", - "mmrdust", - "mmrbc", - "mmraerh2o", - "mlotstsq", - "mlotstmin", - "mlotstmax", - "mlotst", - "mfo", - "masso", - "masscello", - "lwsnl", - "lwp", - "loadss", - "loaddust", - "lai", - "isop", - "intpp", - "intpoc", - "intpn2", - "intdoc", - "intdic", - "huss", - "hus", - "hurs", - "hur", - "hfy", - "hfx", - "hfss", - "hfls", - "hfds", - "hfbasinpmdiff", - "hfbasinpmadv", - "hfbasinpadv", - "hfbasin", - "gpp", - "fsitherm", - "froc", - "frn", - "friver", - "fric", - "frfe", - "ficeberg", - "fgo2", - "fgdms", - "fgco2nat", - "fgco2", - "fVegLitterSenescence", - "fVegLitterMortality", - "fVegLitter", - "fNup", - "fNnetmin", - "fNloss", - "fNleach", - "fNgasNonFire", - "fNgasFire", - "fNgas", - "fNfert", - "fNdep", - "fNProduct", - "fNOx", - "fN2O", - "fLuc", - "fLitterFire", - "fHarvestToProduct", - "fHarvest", - "fFireNat", - "fFire", - "fDeforestToProduct", - "fBNF", - "evspsblveg", - "evspsblsoi", - "evspsbl", - "evs", - "esn", - "es", - "epsi100", - "epp100", - "epn100", - "epfe100", - "epcalc100", - "epc100", - "emivoc", - "emiss", - "emiso4", - "emiso2", - "emioa", - "emiisop", - "emidust", - "emidms", - "emibvoc", - "emibc", - "ec", - "dryso4", - "dryso2", - "drybc", - "dpco2", - "dmsos", - "dms", - "dmlt", - "dissocos", - "dissoc", - "dissicos", - "dissicnat", - "dissic", - "dfeos", - "dfe", - "detocos", - "detoc", - "deptho", - "cod", - "co3satcalcos", - "co3satcalc", - "co3sataragos", - "co3satarag", - "co3os", - "co3nat", - "co3", - "co2mass", - "co2", - "clwvi", - "clwmodis", - "clw", - "cltmodis", - "cltisccp", - "cltcalipso", - "clt", - "clmcalipso", - "cllcalipso", - "clivi", - "climodis", - "cli", - "clhcalipso", - "cl", - "chlos", - "chl", - "chepsoa", - "ch4global", - "cfc12global", - "cfc12", - "cfc11global", - "cfc11", - "cdnc", - "cct", - "ccn", - "ccb", - "calcos", - "calc", - "cWood", - "cVeg", - "cStem", - "cSoilSlow", - "cSoilMedium", - "cSoilFast", - "cSoilAbove1m", - "cSoil", - "cRoot", - "cMisc", - "cLitter", - "cLeaf", - "cLand", - "cCwd", - "bsios", - "bsi", - "bldep", - "bfeos", - "bfe", - "basin", - "ares", - "areacello", - "areacella", - "albisccp", - "airmass", - "agessc", - "abs550aer", - ], - "raw": [ + META_ENDINGS_SHAR = ["_openburning_share"] + + MIP_ERA = "CMIP6" + + TARGET_MIP = "ScenarioMIP" + + SUPPORTED_EXPERIMENTS = [ + "historical", + "ssp119", + "ssp126", + "ssp245", + "ssp370", + "ssp434", + "ssp460", + "ssp534-over", + "ssp585", + ] + + VAR_SOURCE_LOOKUP = [ "years", "year_weight", "year_fr", @@ -1298,341 +741,4 @@ "BC", "AIR", "AGR", - ], -} - -SUPPORTED_EXPERIMENTS = [ - "hist-1950HC", - "lfmip-pdLC", - "ssp126", - "ssp126-ssp370Lu", - "ssp245", - "ssp370", - "ssp370-lowNTCF", - "ssp370-ssp126Lu", - "ssp370SST", - "ssp370SST-lowCH4", - "ssp370SST-lowNTCF", - "ssp370SST-ssp126Lu", - "ssp585", - "hist-resAMO", - "hist-resIPO", - "historical-ext", - "lfmip-initLC", - "lfmip-pdLC-cruNcep", - "lfmip-pdLC-princeton", - "lfmip-pdLC-wfdei", - "lfmip-rmLC", - "lfmip-rmLC-cruNcep", - "lfmip-rmLC-princeton", - "lfmip-rmLC-wfdei", - "pa-futAntSIC", - "pa-futArcSIC", - "pa-pdSIC", - "pa-piAntSIC", - "pa-piArcSIC", - "ssp119", - "ssp370pdSST", - "ssp370SST-lowAer", - "ssp370SST-lowBC", - "ssp370SST-lowO3", - "ssp434", - "ssp460", - "dcppC-atl-pacemaker", - "dcppC-pac-pacemaker", - "pa-futAntSIC-ext", - "pa-futArcSIC-ext", - "pa-pdSIC-ext", - "ssp370-lowNTCFCH4", - "ssp370SST-lowNTCFCH4", - "volc-cluster-21C", - "yr2010CO2", - "dcppA-historical-niff", - "1pctCO2", - "1pctCO2-bgc", - "abrupt-4xCO2", - "dcppC-amv-neg", - "dcppC-amv-pos", - "dcppC-atl-control", - "dcppC-ipv-neg", - "dcppC-ipv-pos", - "dcppC-pac-control", - "deforest-globe", - "faf-heat", - "faf-heat-NA50pct", - "faf-stress", - "faf-water", - "G1", - "hist-aer", - "hist-GHG", - "hist-nat", - "hist-noLu", - "hist-piNTCF", - "hist-spAer-all", - "histSST", - "histSST-noLu", - "histSST-piCH4", - "histSST-piNTCF", - "piClim-4xCO2", - "piClim-aer", - "piClim-anthro", - "piClim-CH4", - "piClim-control", - "piClim-ghg", - "piClim-HC", - "piClim-lu", - "piClim-NTCF", - "volc-long-eq", - "volc-pinatubo-full", - "volc-pinatubo-strat", - "volc-pinatubo-surf", - "1pctCO2-rad", - "1pctCO2Ndep", - "1pctCO2Ndep-bgc", - "abrupt-0p5xCO2", - "abrupt-2xCO2", - "abrupt-solm4p", - "abrupt-solp4p", - "dcppC-amv-ExTrop-neg", - "dcppC-amv-ExTrop-pos", - "dcppC-amv-Trop-neg", - "dcppC-amv-Trop-pos", - "dcppC-ipv-NexTrop-neg", - "dcppC-ipv-NexTrop-pos", - "faf-all", - "faf-antwater-stress", - "faf-heat-NA0pct", - "faf-passiveheat", - "hist-bgc", - "hist-piAer", - "hist-spAer-aer", - "hist-stratO3", - "histSST-piAer", - "histSST-piN2O", - "histSST-piO3", - "piClim-2xdust", - "piClim-2xss", - "piClim-BC", - "piClim-histaer", - "piClim-histall", - "piClim-histghg", - "piClim-histnat", - "piClim-N2O", - "piClim-O3", - "piClim-spAer-aer", - "piClim-spAer-anthro", - "piClim-spAer-histaer", - "piClim-spAer-histall", - "piSST-4xCO2-solar", - "volc-cluster-ctrl", - "volc-long-hlN", - "hist-all-aer2", - "hist-all-nat2", - "hist-CO2", - "hist-sol", - "hist-totalO3", - "hist-volc", - "piClim-2xDMS", - "piClim-2xfire", - "piClim-2xNOx", - "piClim-2xVOC", - "piClim-NH3", - "piClim-NOx", - "piClim-OC", - "piClim-SO2", - "piClim-VOC", - "volc-long-hlS", - "histSST-1950HC", - "esm-ssp585", - "esm-ssp585-ssp126Lu", - "esm-hist-ext", - "ssp534-over-bgc", - "ssp585-bgc", - "esm-1pct-brch-1000PgC", - "esm-1pct-brch-750PgC", - "esm-1pct-brch-2000PgC", - "esm-hist", - "esm-pi-cdr-pulse", - "esm-pi-CO2pulse", - "esm-1pctCO2", - "esm-bell-750PgC", - "esm-bell-1000PgC", - "esm-bell-2000PgC", - "esm-yr2010CO2-control", - "1pctCO2-4xext", - "1pctCO2-cdr", - "esm-ssp534-over", - "esm-ssp585-ocn-alk", - "esm-ssp585ext", - "esm-ssp585-ocn-alk-stop", - "esm-ssp585-ssp126Lu-ext", - "esm-yr2010CO2-cdr-pulse", - "esm-yr2010CO2-CO2pulse", - "esm-yr2010CO2-noemit", - "amip", - "amip-4xCO2", - "amip-future4K", - "amip-hist", - "amip-p4K", - "aqua-4xCO2", - "aqua-control", - "aqua-p4K", - "highresSST-present", - "ism-ctrl-std", - "ism-pdControl-std", - "ism-piControl-self", - "land-hist", - "land-hist-altStartYear", - "land-noLu", - "land-ssp126", - "land-ssp585", - "lgm", - "lig127k", - "midHolocene", - "midPliocene-eoi400", - "omip1", - "past1000", - "piControl-withism", - "rad-irf", - "a4SST", - "a4SSTice", - "a4SSTice-4xCO2", - "amip-a4SST-4xCO2", - "amip-lfmip-pdLC", - "amip-lfmip-pObs", - "amip-lfmip-rmLC", - "amip-lwoff", - "amip-m4K", - "amip-p4K-lwoff", - "amip-piForcing", - "aqua-control-lwoff", - "aqua-p4K-lwoff", - "dcppA-assim", - "esm-piControl-spinup", - "land-cClim", - "land-cCO2", - "land-crop-grass", - "land-crop-noFert", - "land-crop-noIrrig", - "land-crop-noIrrigFert", - "land-hist-altLu1", - "land-hist-altLu2", - "land-hist-cruNcep", - "land-hist-princeton", - "land-hist-wfdei", - "land-noFire", - "land-noPasture", - "land-noShiftCultivate", - "land-noWoodHarv", - "land-ssp434", - "omip1-spunup", - "past1000-solaronly", - "past1000-volconly", - "piControl-spinup", - "piControl-spinup-cmip5", - "piSST", - "piSST-4xCO2", - "piSST-4xCO2-rad", - "piSST-pxK", - "spinup-1950", - "amip-hld", - "amip-TIP", - "amip-TIP-nosh", - "control-slab", - "dcppC-atl-spg", - "esm-past1000", - "ism-lig127k-std", - "omip2", - "omip2-spunup", - "past2k", - "esm-piControl", - "historical", - "historical-cmip5", - "hist-aer-cmip5", - "hist-GHG-cmip5", - "hist-nat-cmip5", - "piControl", - "piControl-cmip5", - "ssp245-aer", - "ssp245-cov-strgreen", - "ssp245-covid", - "ssp245-cov-aer", - "ssp245-cov-fossil", - "ssp245-cov-GHG", - "ssp245-cov-modgreen", - "ssp245-GHG", - "ssp245-nat", - "ssp245-stratO3", - "dcppA-hindcast", - "dcppB-forecast", - "dcppC-forecast-addPinatubo", - "dcppC-hindcast-noPinatubo", - "dcppC-hindcast-noAgung", - "dcppC-hindcast-noElChichon", - "dcppC-forecast-addAgung", - "dcppC-forecast-addElChichon", - "dcppA-hindcast-niff", - "futureSST-4xCO2-solar", - "G6solar", - "G6sulfur", - "G6SST1", - "G7cirrus", - "G7SST1-cirrus", - "ssp534-over", - "G6SST2-solar", - "G6SST2-sulfur", - "G7SST2-cirrus", - "control-1950", - "hist-1950", - "highres-future", - "highresSST-4xCO2", - "highresSST-future", - "highresSST-LAI", - "highresSST-p4K", - "highresSST-smoothed", - "1pctCO2to4x-withism", - "historical-withism", - "ism-1pctCO2to4x-self", - "ism-historical-self", - "ism-1pctCO2to4x-std", - "ism-historical-std", - "ism-asmb-std", - "ism-bsmb-std", - "ism-amip-std", - "ism-ssp585-self", - "ism-ssp585-std", - "ssp585-withism", - "pdSST-futAntSIC", - "pdSST-futArcSIC", - "pdSST-pdSIC", - "pdSST-piAntSIC", - "pdSST-piArcSIC", - "piSST-pdSIC", - "futSST-pdSIC", - "piSST-piSIC", - "amip-climSIC", - "amip-climSST", - "modelSST-futArcSIC", - "modelSST-pdSIC", - "pdSST-futArcSICSIT", - "pdSST-futBKSeasSIC", - "pdSST-futOkhotskSIC", - "pdSST-pdSICSIT", - "rcp26-cmip5", - "rcp45-cmip5", - "rcp60-cmip5", - "rcp85-cmip5", - "volc-cluster-mill", - "volc-pinatubo-slab", -] -# filepath to var to res Mapping -# VAR_RES_MAPPING_PATH = "/home/charlie/Documents/MILA/causalpaca/data/data_description/mappings/variableid2tableid.csv" - - -GRIDDING_HIERACHY = ["gn"] - -# skip subhr because only diagnostics for specific places -REMOVE_RESOLUTONS = ["suhbr"] # resolution endings to remove e.g. kick CFsubhr if this contains 'subhr' - - -RES_TO_CHUNKSIZE = {"year": 1, "mon": 12, "6hr": 1460, "3hr": 2920, "day": 364} + ] diff --git a/climateset/download/constants/input4mips_constants.py b/climateset/download/constants/input4mips_constants.py deleted file mode 100644 index 119b98c..0000000 --- a/climateset/download/constants/input4mips_constants.py +++ /dev/null @@ -1 +0,0 @@ -NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" From bb7b8f1428475f7e3b5a4a9d572f916bc7393a7c Mon Sep 17 00:00:00 2001 From: liellnima Date: Tue, 19 Nov 2024 20:24:20 +0100 Subject: [PATCH 09/38] update configs: move project id to the top --- configs/core_dataset.yaml | 13 +++++++++---- .../awi_sea-ice-thickness_control-1950.yaml | 10 +++++----- configs/downloader/cmip6/canesm_co2_ssp.yaml | 10 +++++----- configs/downloader/cmip6/fgoals_tas_ssp.yaml | 10 +++++----- .../downloader/cmip6/noresm_pr_historical.yaml | 12 ++++++------ configs/downloader/cmip6/noresm_tas_ssp.yaml | 12 ++++++------ .../downloader/cmip6/ukesm_tas_picontrol.yaml | 10 +++++----- configs/downloader/future_usecases/cmip7.yaml | 10 +++++----- configs/downloader/future_usecases/e3sm.yaml | 8 ++++---- .../downloader/future_usecases/obs4mips.yaml | 9 +++++---- .../downloader/input4mips/bc_historical.yaml | 9 ++++----- configs/downloader/input4mips/bc_ssp.yaml | 9 ++++----- .../downloader/input4mips/ch4_historical.yaml | 9 ++++----- configs/downloader/input4mips/ch4_ssp.yaml | 9 ++++----- .../downloader/input4mips/co2_historical.yaml | 9 ++++----- configs/downloader/input4mips/co2_ssp.yaml | 9 ++++----- .../downloader/input4mips/so2_historical.yaml | 9 ++++----- configs/downloader/input4mips/so2_ssp.yaml | 9 ++++----- configs/fgoals_minimal.yaml | 4 ---- configs/minimal_dataset.yaml | 18 +++++++++--------- 20 files changed, 96 insertions(+), 102 deletions(-) delete mode 100644 configs/fgoals_minimal.yaml diff --git a/configs/core_dataset.yaml b/configs/core_dataset.yaml index e6981f5..89cca9c 100644 --- a/configs/core_dataset.yaml +++ b/configs/core_dataset.yaml @@ -1,4 +1,9 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CO2", "BC", "SO2", "CH4", "tas", "pr"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + downloader_kwargs: + variables: ["tas", "pr"] + experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] +input4MIPs: + downloader_kwargs: + variables: ["CO2", "BC", "SO2", "CH4"] + experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] diff --git a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml index 2c95dc4..81ca1d5 100644 --- a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml +++ b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml @@ -1,5 +1,5 @@ -project: "CMIP6" -models: ["AWI-CM-1-1-HR"] -downloader_kwargs: - variables: ["sithick"] - experiments: ["control-1950"] \ No newline at end of file +CMIP6: + models: ["AWI-CM-1-1-HR"] + downloader_kwargs: + variables: ["sithick"] + experiments: ["control-1950"] \ No newline at end of file diff --git a/configs/downloader/cmip6/canesm_co2_ssp.yaml b/configs/downloader/cmip6/canesm_co2_ssp.yaml index affd101..4f33a2c 100644 --- a/configs/downloader/cmip6/canesm_co2_ssp.yaml +++ b/configs/downloader/cmip6/canesm_co2_ssp.yaml @@ -1,5 +1,5 @@ -project: "CMIP6" -models: ["CanESM5"] -downloader_kwargs: - variables: ["co2"] - experiments: ["abrupt-2xCO2"] \ No newline at end of file +CMIP6: + models: ["CanESM5"] + downloader_kwargs: + variables: ["co2"] + experiments: ["abrupt-2xCO2"] \ No newline at end of file diff --git a/configs/downloader/cmip6/fgoals_tas_ssp.yaml b/configs/downloader/cmip6/fgoals_tas_ssp.yaml index 5cbfc5d..b571e01 100644 --- a/configs/downloader/cmip6/fgoals_tas_ssp.yaml +++ b/configs/downloader/cmip6/fgoals_tas_ssp.yaml @@ -1,5 +1,5 @@ -project: "CMIP6" -models: ["FGOALS-g3"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] \ No newline at end of file +CMIP6: + models: ["FGOALS-g3"] + downloader_kwargs: + variables: ["tas"] + experiments: ["ssp245"] \ No newline at end of file diff --git a/configs/downloader/cmip6/noresm_pr_historical.yaml b/configs/downloader/cmip6/noresm_pr_historical.yaml index 39b940d..2bb6f72 100644 --- a/configs/downloader/cmip6/noresm_pr_historical.yaml +++ b/configs/downloader/cmip6/noresm_pr_historical.yaml @@ -1,6 +1,6 @@ -project: "CMIP6" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["pr"] - experiments: ["historical"] - max_ensemble_members: 1 \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + downloader_kwargs: + variables: ["pr"] + experiments: ["historical"] + max_ensemble_members: 1 \ No newline at end of file diff --git a/configs/downloader/cmip6/noresm_tas_ssp.yaml b/configs/downloader/cmip6/noresm_tas_ssp.yaml index dde1d45..858eb4f 100644 --- a/configs/downloader/cmip6/noresm_tas_ssp.yaml +++ b/configs/downloader/cmip6/noresm_tas_ssp.yaml @@ -1,6 +1,6 @@ -project: "CMIP6" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] - ensemble_members: ["r9i1p1f2", "r8i1p1f2"] \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + downloader_kwargs: + variables: ["tas"] + experiments: ["ssp245"] + ensemble_members: ["r9i1p1f2", "r8i1p1f2"] \ No newline at end of file diff --git a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml index dc94d3d..29aaf5c 100644 --- a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml +++ b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml @@ -1,5 +1,5 @@ -project: "CMIP6" -models: ["UKESM1-0-LL"] -downloader_kwargs: - variables: ["tas"] - experiments: ["piControl"] +CMIP6: + models: ["UKESM1-0-LL"] + downloader_kwargs: + variables: ["tas"] + experiments: ["piControl"] diff --git a/configs/downloader/future_usecases/cmip7.yaml b/configs/downloader/future_usecases/cmip7.yaml index 69460ed..dfc3dd6 100644 --- a/configs/downloader/future_usecases/cmip7.yaml +++ b/configs/downloader/future_usecases/cmip7.yaml @@ -1,5 +1,5 @@ -project: "CMIP6Plus" -models: ["HasGEM3-GC31-LL"] -downloader_kwargs: - variables: ["mrsofc"] - experiments: ["hist-lu"] \ No newline at end of file +CMIP6Plus: + models: ["HasGEM3-GC31-LL"] + downloader_kwargs: + variables: ["mrsofc"] + experiments: ["hist-lu"] \ No newline at end of file diff --git a/configs/downloader/future_usecases/e3sm.yaml b/configs/downloader/future_usecases/e3sm.yaml index 56d4fc6..d659326 100644 --- a/configs/downloader/future_usecases/e3sm.yaml +++ b/configs/downloader/future_usecases/e3sm.yaml @@ -1,4 +1,4 @@ -models: ["E3SM"] -downloader_kwargs: - variables: ["???"] - experiments: ["ssp585"] \ No newline at end of file +E3SM: + downloader_kwargs: + variables: ["???"] + experiments: ["ssp585"] \ No newline at end of file diff --git a/configs/downloader/future_usecases/obs4mips.yaml b/configs/downloader/future_usecases/obs4mips.yaml index 8f7f853..e671118 100644 --- a/configs/downloader/future_usecases/obs4mips.yaml +++ b/configs/downloader/future_usecases/obs4mips.yaml @@ -1,4 +1,5 @@ -models: ["ESACCI-CLOUD-ATSR2-AATSR-3-0"] -downloader_kwargs: - variables: ["pctCCI"] - experiments: [""] \ No newline at end of file +#ESACCI-CLOUD-ATSR2-AATSR-3-0: +obs4MIPs: + downloader_kwargs: + variables: ["pctCCI"] + experiments: [""] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_historical.yaml b/configs/downloader/input4mips/bc_historical.yaml index 1ad3cec..74463b3 100644 --- a/configs/downloader/input4mips/bc_historical.yaml +++ b/configs/downloader/input4mips/bc_historical.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["BC"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["BC"] + experiments: ["historical"] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_ssp.yaml b/configs/downloader/input4mips/bc_ssp.yaml index 165c962..107573d 100644 --- a/configs/downloader/input4mips/bc_ssp.yaml +++ b/configs/downloader/input4mips/bc_ssp.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["BC"] - experiments: ["ssp585"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["BC"] + experiments: ["ssp585"] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_historical.yaml b/configs/downloader/input4mips/ch4_historical.yaml index 054ec1f..f18bfee 100644 --- a/configs/downloader/input4mips/ch4_historical.yaml +++ b/configs/downloader/input4mips/ch4_historical.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CH4"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["CH4"] + experiments: ["historical"] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_ssp.yaml b/configs/downloader/input4mips/ch4_ssp.yaml index b023507..21f1f79 100644 --- a/configs/downloader/input4mips/ch4_ssp.yaml +++ b/configs/downloader/input4mips/ch4_ssp.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CH4"] - experiments: ["ssp245"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["CH4"] + experiments: ["ssp245"] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_historical.yaml b/configs/downloader/input4mips/co2_historical.yaml index 2936afd..15dbe54 100644 --- a/configs/downloader/input4mips/co2_historical.yaml +++ b/configs/downloader/input4mips/co2_historical.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CO2"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["CO2"] + experiments: ["historical"] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_ssp.yaml b/configs/downloader/input4mips/co2_ssp.yaml index 1054fbb..48d367f 100644 --- a/configs/downloader/input4mips/co2_ssp.yaml +++ b/configs/downloader/input4mips/co2_ssp.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["C02"] - experiments: ["ssp460"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["C02"] + experiments: ["ssp460"] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_historical.yaml b/configs/downloader/input4mips/so2_historical.yaml index 75cfa41..f2d2e6f 100644 --- a/configs/downloader/input4mips/so2_historical.yaml +++ b/configs/downloader/input4mips/so2_historical.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["SO2"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["SO2"] + experiments: ["historical"] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_ssp.yaml b/configs/downloader/input4mips/so2_ssp.yaml index 74826f0..55436c6 100644 --- a/configs/downloader/input4mips/so2_ssp.yaml +++ b/configs/downloader/input4mips/so2_ssp.yaml @@ -1,5 +1,4 @@ -project: "input4mips" -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["SO2"] - experiments: ["ssp370"] \ No newline at end of file +input4MIPs: + downloader_kwargs: + variables: ["SO2"] + experiments: ["ssp370"] \ No newline at end of file diff --git a/configs/fgoals_minimal.yaml b/configs/fgoals_minimal.yaml deleted file mode 100644 index 145fa02..0000000 --- a/configs/fgoals_minimal.yaml +++ /dev/null @@ -1,4 +0,0 @@ -models: ["FGOALS-g3"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp370"] \ No newline at end of file diff --git a/configs/minimal_dataset.yaml b/configs/minimal_dataset.yaml index af4c0c0..742d043 100644 --- a/configs/minimal_dataset.yaml +++ b/configs/minimal_dataset.yaml @@ -1,9 +1,9 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas", "CH4", "CO2"] - experiments: ["historical", "ssp126"] - max_ensemble_members: 1 - overwrite: true - download_biomassburning: false - start_year: 1990 - end_year: 2030 \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + downloader_kwargs: + variables: ["tas", "CH4", "CO2"] + experiments: ["historical", "ssp126"] + max_ensemble_members: 1 + overwrite: true + start_year: 1990 + end_year: 2030 \ No newline at end of file From 5a0c38f96c94ef0d1fcbb9178a1d7948c2aa46a8 Mon Sep 17 00:00:00 2001 From: liellnima Date: Tue, 19 Nov 2024 20:27:06 +0100 Subject: [PATCH 10/38] update download_from_config func with new constant and config handling. update attribute handling of class. rewrite some if-else blocks. unify model and raw input vars handling. update constants. rename emission handling funcs. add comments for attributes in downloader class. --- climateset/download/downloader.py | 315 ++++++++++++++++-------------- 1 file changed, 172 insertions(+), 143 deletions(-) diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index 0a75a7e..a33ab3c 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -4,14 +4,11 @@ from pyesgf.search import SearchConnection -import climateset.download.constants.cmip6_constants as cmip6_constants -import climateset.download.constants.cmip6plus_constants as cmip6plus_constants -import climateset.download.constants.input4mips_constants as input4mips_constants from climateset import RAW_DATA -from climateset.download.constants.data_constants import ( - EMISSIONS_ENDINGS, - META_ENDINGS_PRC, - META_ENDINGS_SHAR, +from climateset.download.constants.esgf import ( + ESGF_MODEL_OUTPUT_LIST, + ESGF_PROJECTS, + ESGF_RAW_INPUT_LIST, ) from climateset.download.utils import ( _handle_base_search_constraints, @@ -20,7 +17,7 @@ download_raw_input_variable, get_upload_version, ) -from climateset.utils import create_logger, get_keys_from_value, get_yaml_config +from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) @@ -32,7 +29,6 @@ class Downloader: It communicates with the esgf nodes to search and download the specified data. """ - # TODO Fix complexity issue def __init__( # noqa: C901 self, project: str = "CMIP6", # default as in ClimateBench @@ -66,128 +62,164 @@ def __init__( # noqa: C901 download_biomassburning: Flag if biomassburning data for input4mips variables should be downloaded. download_metafiles: Flag if metafiles for input4mips variables should be downloaded. """ - # Args init + # Args init for + ## (all) self.logger = logger - # init global variables depending on project type - self._init_globs(project) self.project: str = project + self.data_dir: Union[str, pathlib.Path] = data_dir + self.overwrite: bool = overwrite + ## (climate model output) (e.g. cmip6) self.model: str = model - self.model_node_link: str = "" - # to reduce computation cost self.experiments: list[str] = experiments - self.raw_vars: list[str] = [] - self.model_vars: list[str] = [] + self.ensemble_members: list[str] = ensemble_members + self.max_ensemble_members: int = max_ensemble_members + ## (climate model input) (e.g. input4mips) + self.download_metafiles: bool = download_metafiles # TODO infer automatically from vars + self.download_biomass_burning: bool = download_biomassburning # TODO infer automatically from vars + self.use_plain_emission_vars: bool = use_plain_emission_vars # TODO infer automatically from vars + # ESGF project constants for + ## (all) + self.node_link: str = None + self.avail_variables: list[str] = None + self.avail_experiments: list[str] = None + ## (climate model output) + self.avail_models: list[str] = None + ## (climate model input) + self.emissions_endings: list[str] = None + self.meta_endings_prc: list[str] = None + self.meta_endings_share: list[str] = None + self.mip_area: str = None + self.target_mip: str = None + # Attributes that are going to be retrieved / set within this class for + ## (all) + self.vars: list[str] = variables + ## (climate model inputs) self.biomass_vars: list[str] = [] self.meta_vars_percentage: list[str] = [] self.meta_vars_share: list[str] = [] - self.data_dir: Union[str, pathlib.Path] = data_dir - self.ensemble_members: list[str] = ensemble_members - self.max_ensemble_members: int = max_ensemble_members - self.overwrite: bool = overwrite - self.download_metafiles: bool = download_metafiles - self.download_biomass_burning: bool = download_biomassburning - self.use_plain_emission_vars: bool = use_plain_emission_vars - self.model_node_link = self.NODE_LINK + + self._init_project_constants() # if max ensemble member number is too large --> we are relying on the server to complain? + # adapt variables in case of input4mips + if self.project == "input4MIPs": + self._handle_emission_variables( + variables=variables, + ) + self._check_desired_params() - # Args processing - self._handle_variables( - variables=variables, - ) - # self._handle_model_params() - - # TODO we need to make the downloader an abstract parent class - # each project needs its own constant file + downloader function, the rest stays the same - # this function should not be done this way, this is the first naive approach - def _init_globs(self, project: str): - """Load globs depending on project.""" - if project == "CMIP6": - self.MODEL_SOURCES = cmip6_constants.MODEL_SOURCES - self.SUPPORTED_EXPERIMENTS = cmip6_constants.SUPPORTED_EXPERIMENTS - self.VAR_SOURCE_LOOKUP = cmip6_constants.VAR_SOURCE_LOOKUP - self.NODE_LINK = cmip6_constants.NODE_LINK - elif project == "input4mips": - self.NODE_LINK = input4mips_constants.NODE_LINK - elif project == "CMIP6Plus": - self.MODEL_SOURCES = cmip6plus_constants.MODEL_SOURCES - self.SUPPORTED_EXPERIMENTS = cmip6plus_constants.SUPPORTED_EXPERIMENTS - self.VAR_SOURCE_LOOKUP = cmip6plus_constants.VAR_SOURCE_LOOKUP - self.NODE_LINK = cmip6plus_constants.NODE_LINK - else: - self.logger.info(f"Project {project} has not been implemented in the Downloader yet.") - raise NotImplementedError(f"Project {project} has not been implemented in the downloader.") + # @Francis (JK) I am still not happy about this function. Please let me know if I can improve this bit + def _init_project_constants(self): + """Assign/init attributed depending on the project.""" + if self.project not in ESGF_PROJECTS: + self.logger.info(f"Project {self.project} has not been implemented in the Downloader yet.") + raise ValueError( + f"Project {self.project} is not recognized. Consider adding a constant class in download/constants and the esgf.py file." + ) + proj_constants = ESGF_PROJECTS[self.project] + + # init shared constants + self.node_link = proj_constants.NODE_LINK + self.avail_variables = proj_constants.VAR_SOURCE_LOOKUP + self.avail_experiments = proj_constants.SUPPORTED_EXPERIMENTS + + # init climate model output constants + if self.project in ESGF_MODEL_OUTPUT_LIST: + self.avail_models = proj_constants.MODEL_SOURCES + + # init input4mips constants + if self.project == "input4MIPs": + self.emissions_endings = proj_constants.EMISSIONS_ENDINGS + self.meta_endings_prc = proj_constants.META_ENDINGS_PRC + self.meta_endings_share = proj_constants.META_ENDINGS_SHAR + self.mip_area = proj_constants.MIP_ERA + self.target_mip = proj_constants.TARGET_MIP def _check_desired_params(self): """Check if the desired params exist.""" # check model - if self.model not in self.MODEL_SOURCES: - self.logger.info(f"WARNING: Model {self.model} unknown.") + if (self.model is not None) and (self.model not in self.avail_models): + self.logger.warning(f"Model {self.model} unknown.") raise ValueError( - f"Model {self.model} is not in the list of supported models. Consider adding manually to esgf_server.py" + f"Model {self.model} is not in the list of supported models. Check for typos and consider adding it manually." ) # check experiments - # loop over experiments and check for each experiment in the list + for exp in self.experiments: + if exp not in self.avail_experiments: + self.logger.warning(f"Experiment {exp} unknown.") + raise ValueError( + f"Experiment {exp} is not in the list of supported experiments. Check for typos and consider adding it manually." + ) # check variables - - def _handle_variables(self, variables: list[str]): - self._generate_variables(variables=variables) - self._generate_plain_emission_vars() - self.logger.info(f"Raw variables to download: {self.raw_vars}") - self.logger.info(f"Model predicted vars to download: {self.model_vars}") - if self.download_biomass_burning: - self.logger.info(f"Download biomass burning vars: {self.biomass_vars}") - if self.download_metafiles: - self.logger.info(f"Downloading meta vars:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}") - - def _handle_model_params(self): - # check if model, variable, and experiment exists - try: - self.model_node_link = self.MODEL_SOURCES[self.model]["node_link"] - except KeyError: - if self.model is not None: - self.logger.info(f"WARNING: Model {self.model} unknown.") + for var in self.vars: + if var not in self.avail_variables: + self.logger.warning(f"Variable {var} unknown.") raise ValueError( - "Model {} is not in the list of supported models. Consider adding manually to esgf_server.py".format( - self.model - ) + f"Variable {var} is not in the list of supported variables. Check for typos and consider adding it manually." + ) + + # check variables + def _handle_emission_variables(self, variables: list[str]): + if self.project == "input4MIPs": + self.vars = [] + self._generate_raw_emission_vars(variables=variables) + self._generate_plain_emission_vars() + self.logger.info(f"Emission variables to download: {self.vars}") + if self.download_biomass_burning: + self.logger.info(f"Biomass burning vars to download: {self.biomass_vars}") + if self.download_metafiles: + self.logger.info( + f"Meta emission vars to download:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}" ) - self.model_node_link = self.MODEL_SOURCES[self.model]["node_link"] + + # def _check_models(self): + # # check if model, variable, and experiment exists + # try: + # self.node_link = self.avail_models[self.model]["node_link"] + # except KeyError: + # if self.model is not None: + # self.logger.info(f"WARNING: Model {self.model} unknown.") + # raise ValueError( + # "Model {} is not in the list of supported models. Consider adding manually to esgf_server.py".format( + # self.model + # ) + # ) + # self.node_link = self.avail_models[self.model]["node_link"] def _generate_plain_emission_vars(self): if self.use_plain_emission_vars: # plain vars are biomass vars - self.biomass_vars = self.raw_vars + self.biomass_vars = self.vars self.meta_vars_percentage = [ biomass_var + ending for biomass_var in self.biomass_vars if biomass_var != "CO2" - for ending in META_ENDINGS_PRC + for ending in self.meta_endings_prc ] self.meta_vars_share = [ biomass_var + ending for biomass_var in self.biomass_vars if biomass_var != "CO2" - for ending in META_ENDINGS_SHAR + for ending in self.meta_endings_share ] - self.raw_vars = [ - variable + emission_ending for variable in self.raw_vars for emission_ending in EMISSIONS_ENDINGS + self.vars = [ + variable + emission_ending for variable in self.vars for emission_ending in self.emissions_endings ] # be careful with CO2 - if "CO2_em_openburning" in self.raw_vars: - self.raw_vars.remove("CO2_em_openburning") + if "CO2_em_openburning" in self.vars: + self.vars.remove("CO2_em_openburning") else: # get plain input4mips vars = biomass vars for historical - self.biomass_vars = list({v.split("_")[0] for v in self.raw_vars}) - # remove biomass vars from normal raw vars list + self.biomass_vars = list({v.split("_")[0] for v in self.vars}) + # remove biomass vars from normal vars list for b in self.biomass_vars: try: - self.raw_vars.remove(b) + self.vars.remove(b) except Exception as error: self.logger.warning(f"Caught the following exception but continuing : {error}") @@ -195,29 +227,23 @@ def _generate_plain_emission_vars(self): biomass_var + ending for biomass_var in self.biomass_vars if biomass_var != "CO2" - for ending in META_ENDINGS_PRC + for ending in self.meta_endings_prc ] self.meta_vars_share = [ biomass_var + ending for biomass_var in self.biomass_vars if biomass_var != "CO2" - for ending in META_ENDINGS_SHAR + for ending in self.meta_endings_share ] - def _generate_variables(self, variables: list[str]): + def _generate_raw_emission_vars(self, variables: list[str]): if variables is None: - variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] + # variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] + raise ValueError("No variables have been given to the downloader. Variables must be given for downloader.") variables = [v.replace(" ", "_").replace("-", "_") for v in variables] self.logger.info(f"Cleaned variables : {variables}") for v in variables: - t = get_keys_from_value(d=self.VAR_SOURCE_LOOKUP, val=v, logger=self.logger) - if t == "model": - self.model_vars.append(v) - elif t == "raw": - self.raw_vars.append(v) - - else: - self.logger.info(f"WARNING: unknown source type for var {v}. Not supported. Skipping.") + self.vars.append(v) # # Class functions @@ -243,7 +269,7 @@ def download_from_model_single_var( # noqa: C901 preferred_version: data upload version, if 'latest', the newest version will get selected always default_grid_label: default gridding method in which the data is provided """ - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.node_link, distrib=False) facets = ( "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " @@ -333,7 +359,7 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info("Using download_raw_input_single_var() function") facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.node_link, distrib=False) ctx = conn.new_context( project=project, @@ -383,7 +409,7 @@ def download_meta_historic_biomassburning_single_var( variable_id = variable.replace("_", "-") variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" self.logger.info(variable, variable_id, institution_id) - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.node_link, distrib=False) facets = "nominal_resolution,version" ctx = conn.new_context( project=project, @@ -427,18 +453,17 @@ def download_from_model(self): If the constraints cannot be met, per default behaviour for the downloader to select first other available value """ - - for variable in self.model_vars: + for variable in self.vars: self.logger.info(f"Downloading data for variable: {variable}") for experiment in self.experiments: - if experiment in self.SUPPORTED_EXPERIMENTS: - self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) - else: + if experiment not in self.avail_experiments: self.logger.info( f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{self.SUPPORTED_EXPERIMENTS}. Skipping." + f"{self.avail_experiments}. Skipping." ) + continue + self.logger.info(f"Downloading data for experiment: {experiment}") + self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) def download_raw_input(self): """ @@ -455,11 +480,10 @@ def download_raw_input(self): If the constraints cannot be met, the default behaviour for the downloader is to select first other available value. """ - for variable in self.raw_vars: + for variable in self.vars: + institution_id = "PNNL-JGCRI" if variable.endswith("openburning"): institution_id = "IAMC" - else: - institution_id = "PNNL-JGCRI" self.logger.info(f"Downloading data for variable: {variable}") self.download_raw_input_single_var(variable=variable, institution_id=institution_id) @@ -489,37 +513,42 @@ def download_from_config_file(config: str, logger: logging.Logger = LOGGER): if isinstance(config, str): config = pathlib.Path(config) config = get_yaml_config(config) - try: - project = config["project"] - except KeyError as e: - logger.warning( - f"No project specified. Assuming CMIP6 data should be downloaded. Caught the following exception: {e}" - ) - project = "CMIP6" - try: - models = config["models"] - except KeyError as e: - logger.warning(f"Caught the following exception but continuing : {e}") - logger.info("No climate models specified. Assuming only input4mips data should be downloaded.") - models = [None] - downloader_kwargs = config["downloader_kwargs"] - logger.info(f"Downloader kwargs : {downloader_kwargs}") - - # TODO @Francis I think we need to implement an abstract Downloader. - # Each project should get its own Downloader: CMIPXDownloader, input4mipsDownloader, etc. - # These classes only need to implement the different downloading functions needed for their specific datasets. - # Here, I am just doing the naive way with the stuff we have right now: - if project == "input4mips": - downloader = Downloader(project=project, model=models, **downloader_kwargs, logger=logger) - downloader.download_raw_input() - elif (project == "CMIP6") or (project == "CMIP6Plus"): - for m in models: - downloader = Downloader(project=project, model=m, **downloader_kwargs, logger=logger) - downloader.download_from_model() - else: - logger.info( - f"Project {project} is not supported. Consider implementing your own downloader childclass for this." - ) + + # get the supported esgf projects (cmip6, cmip6plus, input4mips) + implemented_projects = ESGF_PROJECTS.keys() + + # flag to check if at least a single project was found + project_found = False + + # iterate over all listed projects and download the requested data + for project_name, project_kwargs in config.items(): + downloader_kwargs = project_kwargs["downloader_kwargs"] + logger.info(f"Start downloading requested data for project {project_name}:") + logger.info(f" Downloader kwargs : {downloader_kwargs}") + + # project not found issues a warning + if project_name not in implemented_projects: + logger.warning( + f"The listed project {project_name} is not recognized. List of recognized projects: {implemented_projects}. Consider extending the downloader for the esgf project you would like to download. Continues attempting downloading data of other listed projects." + ) + continue + + # projects that have to be downloaded model wise, e.g. cmip6 + if project_name in ESGF_RAW_INPUT_LIST: + downloader = Downloader(project=project_name, model=None, **downloader_kwargs, logger=logger) + downloader.download_raw_input() + project_found = True + + # projects that have to be downloaded model-independent, e.g. input4mips + if project_name in ESGF_MODEL_OUTPUT_LIST: + for m in project_kwargs["models"]: + downloader = Downloader(project=project_name, model=m, **downloader_kwargs, logger=logger) + downloader.download_from_model() + project_found = True + + logger.info(f"Completed downloading data for project {project_name};") + + if not project_found: raise ValueError( - f"Project {project} is not supported. Currently supported projects are: CMIP6, CMIP6Plus, input4mips." + f"Failed to download the requested project data because none was recognized. Recognized projects are: {implemented_projects}." ) From ad5e0b0fa4a1f3b41a3743f77a0ded385ad0ce6f Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 10 Jan 2025 16:09:10 -0500 Subject: [PATCH 11/38] Add base structure for abstract downloader and implementations --- climateset/download/abstract_downloader.py | 7 + climateset/download/cmip6_downloader.py | 134 ++++++++++++++++++ climateset/download/input4mips_downloader.py | 141 +++++++++++++++++++ 3 files changed, 282 insertions(+) create mode 100644 climateset/download/abstract_downloader.py create mode 100644 climateset/download/cmip6_downloader.py create mode 100644 climateset/download/input4mips_downloader.py diff --git a/climateset/download/abstract_downloader.py b/climateset/download/abstract_downloader.py new file mode 100644 index 0000000..ebf69cd --- /dev/null +++ b/climateset/download/abstract_downloader.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class AbstractDownloader(ABC): + @abstractmethod + def download(self): + pass diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py new file mode 100644 index 0000000..312f857 --- /dev/null +++ b/climateset/download/cmip6_downloader.py @@ -0,0 +1,134 @@ +from abstract_downloader import AbstractDownloader +from pyesgf.search import SearchConnection + +from climateset.download.utils import ( + _handle_base_search_constraints, + download_model_variable, + get_upload_version, +) +from climateset.utils import create_logger + +LOGGER = create_logger(__name__) + + +class CMIP6Downloader(AbstractDownloader): + def __init__(self): + self.logger = LOGGER + + def download(self): + """ + Function handling the download of all variables that are associated with a model's output. + + Searches for all files associated with the respected variables and experiment that the downloader + was initialized with. + + A search connection is established and the search is iteratively constraint to meet all specifications. + Data is downloaded and stored in a separate file for each year. The default format is netCDF4. + + Resulting hierarchy: + + `CMIPx/model_id/ensemble_member/experiment/variable/nominal_resolution/frequency/year.nc` + + If the constraints cannot be met, per default behaviour for the downloader to select first other + available value + """ + + for variable in self.model_vars: + self.logger.info(f"Downloading data for variable: {variable}") + for experiment in self.experiments: + if experiment in self.SUPPORTED_EXPERIMENTS: + self.logger.info(f"Downloading data for experiment: {experiment}") + self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) + else: + self.logger.info( + f"Chosen experiment {experiment} not supported. All supported experiments: " + f"{self.SUPPORTED_EXPERIMENTS}. Skipping." + ) + + def download_from_model_single_var( # noqa: C901 + self, + variable: str, + experiment: str, + project: str = "CMIP6", + default_frequency: str = "mon", + preferred_version: str = "latest", + default_grid_label: str = "gn", + ): + """ + Function handling the download of a single variable-experiment pair that is associated with a model's output + (CMIP data). + + Args: + variable: variable ID + experiment: experiment ID + project: umbrella project id e.g. CMIPx + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + default_grid_label: default gridding method in which the data is provided + """ + conn = SearchConnection(url=self.model_node_link, distrib=False) + + facets = ( + "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " + "version, grid_label, experiment_id" + ) + + self.logger.info("Using download_from_model_single_var() function") + + ctx = conn.new_context( + project=project, + experiment_id=experiment, + source_id=self.model, + variable=variable, + facets=facets, + ) + + ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + + variants = list(ctx.facet_counts["variant_label"]) + + if len(variants) < 1: + self.logger.info( + "No items were found for this request. Please check on the esgf server if the combination of your model/scenarios/variables exists." + ) + raise ValueError( + "Downloader did not find any items on esgf for your request with: Project {project}, Experiment {experiment}, Model {self.model}, Variable {variable}." + ) + + self.logger.info(f"Available variants : {variants}\n") + self.logger.info(f"Length : {len(variants)}") + + # TODO refactor logic of if/else + if not self.ensemble_members: + if self.max_ensemble_members > len(variants): + self.logger.info("Less ensemble members available than maximum number desired. Including all variants.") + ensemble_member_final_list = variants + else: + self.logger.info( + f"{len(variants)} ensemble members available than desired (max {self.max_ensemble_members}. " + f"Choosing only the first {self.max_ensemble_members}.)." + ) + ensemble_member_final_list = variants[: self.max_ensemble_members] + else: + self.logger.info(f"Desired list of ensemble members given: {self.ensemble_members}") + ensemble_member_final_list = list(set(variants) & set(self.ensemble_members)) + if len(ensemble_member_final_list) == 0: + self.logger.info("WARNING: no overlap between available and desired ensemble members!") + self.logger.info("Skipping.") + return None + + for ensemble_member in ensemble_member_final_list: + self.logger.info(f"Ensembles member: {ensemble_member}") + ctx_ensemble = ctx.constrain(variant_label=ensemble_member) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx_ensemble = ctx_ensemble.constrain(version=version) + + results = ctx_ensemble.search() + + self.logger.info(f"Result len {len(results)}") + + download_model_variable( + model_id=self.model, search_results=results, variable=variable, base_path=self.data_dir + ) diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py new file mode 100644 index 0000000..61569e8 --- /dev/null +++ b/climateset/download/input4mips_downloader.py @@ -0,0 +1,141 @@ +from abstract_downloader import AbstractDownloader +from pyesgf.search import SearchConnection + +from climateset.download.utils import ( + _handle_base_search_constraints, + download_metadata_variable, + download_raw_input_variable, + get_upload_version, +) +from climateset.utils import create_logger + +LOGGER = create_logger(__name__) + + +class Input4MipsDownloader(AbstractDownloader): + def __init__(self): + self.raw_vars = "" + self.logger = LOGGER + + def download(self): + for variable in self.raw_vars: + if variable.endswith("openburning"): + institution_id = "IAMC" + else: + institution_id = "PNNL-JGCRI" + self.logger.info(f"Downloading data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id=institution_id) + + if self.download_biomass_burning & ("historical" in self.experiments): + for variable in self.biomass_vars: + self.logger.info(f"Downloading biomassburing data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id="VUA") + + if self.download_metafiles: + for variable in self.meta_vars_percentage: + # percentage are historic and have no scenarios + self.logger.info(f"Downloading meta percentage data for variable: {variable}") + self.download_meta_historic_biomassburning_single_var(variable=variable, institution_id="VUA") + for variable in self.meta_vars_share: + self.logger.info(f"Downloading meta openburning share data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id="IAMC") + + def download_raw_input_single_var( # noqa: C901 + self, + variable: str, + project: str = "input4mips", + institution_id: str = "PNNL-JGCRI", + default_frequency: str = "mon", + preferred_version: str = "latest", + default_grid_label: str = "gn", + ): + """ + Function handling the download of all input4mips data associated with a single variable. + + Args: + variable: variable ID + project: umbrella project, here "input4mips" + institution_id: id of the institution that provides the data + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + default_grid_label: default gridding method in which the data is provided + """ + self.logger.info("Using download_raw_input_single_var() function") + + facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" + conn = SearchConnection(url=self.model_node_link, distrib=False) + + ctx = conn.new_context( + project=project, + variable=variable, + institution_id=institution_id, + facets=facets, + ) + + ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + + mips_targets = list(ctx.facet_counts["target_mip"]) + self.logger.info(f"Available target mips: {mips_targets}") + + for target in mips_targets: + ctx_target = ctx.constrain(target_mip=target) + version = get_upload_version(context=ctx_target, preferred_version=preferred_version) + if version: + ctx_target = ctx_target.constrain(version=version) + + results = ctx_target.search() + self.logger.info(f"Result len {len(results)}") + if len(results) > 0: + download_raw_input_variable( + institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir + ) + + def download_meta_historic_biomassburning_single_var( + self, + variable: str, + institution_id: str, + project: str = "input4mips", + default_grid_label: str = "gn", + default_frequency: str = "mon", + preferred_version: str = "latest", + ): + """ + Function handling the download of all metadata associated with a single input4mips variable. + + Args: + variable: variable ID + project: umbrella project + institution_id: id of the institution that provides the data + default_grid_label: default gridding method in which the data is provided + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + """ + variable_id = variable.replace("_", "-") + variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" + self.logger.info(variable, variable_id, institution_id) + conn = SearchConnection(url=self.model_node_link, distrib=False) + facets = "nominal_resolution,version" + ctx = conn.new_context( + project=project, + variable=variable_search, + variable_id=variable_id, + institution_id=institution_id, + target_mip="CMIP", + facets=facets, + ) + + ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx = ctx.constrain(version=version) + + results = ctx.search() + self.logger.info(f"Result len {len(results)}") + + result_list = [r.file_context().search() for r in results] + self.logger.info(f"List of results :\n{result_list}") + + download_metadata_variable( + institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir + ) From aa0e4512916281f30b6734b28cb601485e04f87a Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 28 Jan 2025 17:10:15 -0500 Subject: [PATCH 12/38] Refactor ESGF constants and project constants --- climateset/download/constants/esgf.py | 18 ++++++++++++------ climateset/download/downloader.py | 8 ++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/climateset/download/constants/esgf.py b/climateset/download/constants/esgf.py index 879d946..ba296c6 100644 --- a/climateset/download/constants/esgf.py +++ b/climateset/download/constants/esgf.py @@ -2,16 +2,22 @@ from .cmip6plus import Cmip6plusConstants from .input4mips import Input4mipsConstants +CMIP6 = "CMIP6" +CMIP6PLUS = "CMIP6Plus" +INPUT4MIPS = "input4MIPs" + +ESGF_PROJECTS = frozenset([CMIP6, CMIP6PLUS, INPUT4MIPS]) + # constant classes for esgf projects implemented here # add your own esgf project for downloading to download/constants/ and add the constant class to the dict and lists here -ESGF_PROJECTS = { - "CMIP6": Cmip6Constants, - "CMIP6Plus": Cmip6plusConstants, - "input4MIPs": Input4mipsConstants, +ESGF_PROJECTS_CONSTANTS = { + CMIP6: Cmip6Constants, + CMIP6PLUS: Cmip6plusConstants, + INPUT4MIPS: Input4mipsConstants, } # datasets that provide inputs to climate models -ESGF_RAW_INPUT_LIST = ["input4MIPs"] +ESGF_RAW_INPUT_LIST = [INPUT4MIPS] # datasets that provide outputs from climate models -ESGF_MODEL_OUTPUT_LIST = ["CMIP6", "CMIP6Plus"] +ESGF_MODEL_OUTPUT_LIST = [CMIP6, CMIP6PLUS] diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index a33ab3c..bdca350 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -7,7 +7,7 @@ from climateset import RAW_DATA from climateset.download.constants.esgf import ( ESGF_MODEL_OUTPUT_LIST, - ESGF_PROJECTS, + ESGF_PROJECTS_CONSTANTS, ESGF_RAW_INPUT_LIST, ) from climateset.download.utils import ( @@ -113,12 +113,12 @@ def __init__( # noqa: C901 # @Francis (JK) I am still not happy about this function. Please let me know if I can improve this bit def _init_project_constants(self): """Assign/init attributed depending on the project.""" - if self.project not in ESGF_PROJECTS: + if self.project not in ESGF_PROJECTS_CONSTANTS: self.logger.info(f"Project {self.project} has not been implemented in the Downloader yet.") raise ValueError( f"Project {self.project} is not recognized. Consider adding a constant class in download/constants and the esgf.py file." ) - proj_constants = ESGF_PROJECTS[self.project] + proj_constants = ESGF_PROJECTS_CONSTANTS[self.project] # init shared constants self.node_link = proj_constants.NODE_LINK @@ -515,7 +515,7 @@ def download_from_config_file(config: str, logger: logging.Logger = LOGGER): config = get_yaml_config(config) # get the supported esgf projects (cmip6, cmip6plus, input4mips) - implemented_projects = ESGF_PROJECTS.keys() + implemented_projects = ESGF_PROJECTS_CONSTANTS.keys() # flag to check if at least a single project was found project_found = False From 6a76fa9770380cc21c00ea047edb8b30071cf525 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 28 Jan 2025 17:12:17 -0500 Subject: [PATCH 13/38] Add first base structure of Config classes --- climateset/download/abstract_downloader.py | 5 + .../download/abstract_downloader_config.py | 186 ++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 climateset/download/abstract_downloader_config.py diff --git a/climateset/download/abstract_downloader.py b/climateset/download/abstract_downloader.py index ebf69cd..c4bf923 100644 --- a/climateset/download/abstract_downloader.py +++ b/climateset/download/abstract_downloader.py @@ -1,7 +1,12 @@ from abc import ABC, abstractmethod +from climateset.download.abstract_downloader_config import AbstractDownloaderConfig + class AbstractDownloader(ABC): + def __init__(self, config: AbstractDownloaderConfig): + self.config = config + @abstractmethod def download(self): pass diff --git a/climateset/download/abstract_downloader_config.py b/climateset/download/abstract_downloader_config.py new file mode 100644 index 0000000..d2b5ed3 --- /dev/null +++ b/climateset/download/abstract_downloader_config.py @@ -0,0 +1,186 @@ +import logging +from abc import ABC +from pathlib import Path +from typing import Union + +import yaml + +from climateset import CONFIGS, RAW_DATA +from climateset.download.constants.esgf import ESGF_PROJECTS, ESGF_PROJECTS_CONSTANTS +from climateset.utils import create_logger + +LOGGER = create_logger(__name__) + + +class AbstractDownloaderConfig(ABC): + def __init__( + self, + project: str, + data_dir: Union[str, Path] = RAW_DATA, + experiments: list[str] = None, + variables: list[str] = None, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + self.logger = logger + + self.project = "" + uppercase_project = project.upper() + for p in ESGF_PROJECTS: + if p.upper() == uppercase_project: + self.project = p + + if self.project not in ESGF_PROJECTS: + self.logger.error(f"Project {self.project} has not been implemented in the Downloader yet.") + raise ValueError( + f"Project {self.project} is not recognized. Consider adding a constant class in download/constants and " + f"the esgf.py file." + ) + + if isinstance(data_dir, str): + data_dir = Path(data_dir) + self.data_dir = data_dir + + self.experiments = experiments + self.variables = variables + self.overwrite = overwrite + + # init shared constants + self.proj_constants = ESGF_PROJECTS_CONSTANTS[self.project] + self.node_link = self.proj_constants.NODE_LINK + self.avail_variables = self.proj_constants.VAR_SOURCE_LOOKUP + self.avail_experiments = self.proj_constants.SUPPORTED_EXPERIMENTS + + def generate_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + if isinstance(config_path, str): + config_path = Path(config_path) + if not config_file_name.endswith(".yaml"): + config_file_name = f"{config_file_name}.yaml" + + config_full_path = config_path / config_file_name + data = {self.project: {}} + for key, value in self.__dict__.items(): + if key not in ["project", "logger"] and not callable(value): + data[self.project][key] = value + with open(config_full_path, "w") as config_file: + yaml.dump(data, config_file, indent=2) + + +class Input4mipsDownloaderConfig(AbstractDownloaderConfig): + def __init__( + self, + project: str, + data_dir: str = RAW_DATA, + experiments: list[str] = None, + variables: list[str] = None, + download_biomassburning: bool = True, # get biomassburning data for input4mips + download_metafiles: bool = True, # get input4mips meta files + use_plain_emission_vars: bool = True, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + super().__init__(project, data_dir, experiments, variables, overwrite, logger) + + self.download_metafiles: bool = download_metafiles # TODO infer automatically from vars + self.download_biomass_burning: bool = download_biomassburning # TODO infer automatically from vars + self.use_plain_emission_vars: bool = use_plain_emission_vars + + self.emissions_endings = self.proj_constants.EMISSIONS_ENDINGS + self.meta_endings_prc = self.proj_constants.META_ENDINGS_PRC + self.meta_endings_share = self.proj_constants.META_ENDINGS_SHAR + self.mip_area = self.proj_constants.MIP_ERA + self.target_mip = self.proj_constants.TARGET_MIP + + # Attributes that are going to be retrieved / set within this class for + ## (all) + self.vars: list[str] = variables + ## (climate model inputs) + self.biomass_vars: list[str] = [] + self.meta_vars_percentage: list[str] = [] + self.meta_vars_share: list[str] = [] + + self._handle_emission_variables( + variables=variables, + ) + + def _handle_emission_variables(self, variables: list[str]): + self.vars = [] + self._generate_raw_emission_vars(variables=variables) + self._generate_plain_emission_vars() + self.logger.info(f"Emission variables to download: {self.vars}") + if self.download_biomass_burning: + self.logger.info(f"Biomass burning vars to download: {self.biomass_vars}") + if self.download_metafiles: + self.logger.info( + f"Meta emission vars to download:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}" + ) + + def _generate_raw_emission_vars(self, variables: list[str]): + if variables is None: + # variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] + raise ValueError("No variables have been given to the downloader. Variables must be given for downloader.") + variables = [v.replace(" ", "_").replace("-", "_") for v in variables] + self.logger.info(f"Cleaned variables : {variables}") + for v in variables: + self.vars.append(v) + + def _generate_plain_emission_vars(self): + if self.use_plain_emission_vars: + # plain vars are biomass vars + self.biomass_vars = self.vars + self.meta_vars_percentage = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_prc + ] + self.meta_vars_share = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_share + ] + + self.vars = [ + variable + emission_ending for variable in self.vars for emission_ending in self.emissions_endings + ] + # be careful with CO2 + if "CO2_em_openburning" in self.vars: + self.vars.remove("CO2_em_openburning") + else: + # get plain input4mips vars = biomass vars for historical + self.biomass_vars = list({v.split("_")[0] for v in self.vars}) + # remove biomass vars from normal vars list + for b in self.biomass_vars: + try: + self.vars.remove(b) + except Exception as error: + self.logger.warning(f"Caught the following exception but continuing : {error}") + + self.meta_vars_percentage = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_prc + ] + self.meta_vars_share = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_share + ] + + +class CMIP6DownloaderConfig(AbstractDownloaderConfig): + def __init__( + self, + project: str, + data_dir: str = RAW_DATA, + experiments: list[str] = None, + variables: list[str] = None, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + super().__init__(project, data_dir, experiments, variables, overwrite, logger) + + self.avail_models = self.proj_constants.MODEL_SOURCES From 23b0bea3f278b68349314be93d514fcfcc1fed0e Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 28 Jan 2025 17:44:08 -0500 Subject: [PATCH 14/38] Integrate config class for Input4mips --- climateset/download/abstract_downloader.py | 5 ---- climateset/download/input4mips_downloader.py | 25 ++++++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/climateset/download/abstract_downloader.py b/climateset/download/abstract_downloader.py index c4bf923..ebf69cd 100644 --- a/climateset/download/abstract_downloader.py +++ b/climateset/download/abstract_downloader.py @@ -1,12 +1,7 @@ from abc import ABC, abstractmethod -from climateset.download.abstract_downloader_config import AbstractDownloaderConfig - class AbstractDownloader(ABC): - def __init__(self, config: AbstractDownloaderConfig): - self.config = config - @abstractmethod def download(self): pass diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py index 61569e8..e76a487 100644 --- a/climateset/download/input4mips_downloader.py +++ b/climateset/download/input4mips_downloader.py @@ -1,6 +1,7 @@ from abstract_downloader import AbstractDownloader from pyesgf.search import SearchConnection +from climateset.download.abstract_downloader_config import Input4mipsDownloaderConfig from climateset.download.utils import ( _handle_base_search_constraints, download_metadata_variable, @@ -13,7 +14,8 @@ class Input4MipsDownloader(AbstractDownloader): - def __init__(self): + def __init__(self, config: Input4mipsDownloaderConfig): + self.config: Input4mipsDownloaderConfig = config self.raw_vars = "" self.logger = LOGGER @@ -26,17 +28,17 @@ def download(self): self.logger.info(f"Downloading data for variable: {variable}") self.download_raw_input_single_var(variable=variable, institution_id=institution_id) - if self.download_biomass_burning & ("historical" in self.experiments): - for variable in self.biomass_vars: + if self.config.download_biomass_burning & ("historical" in self.config.experiments): + for variable in self.config.biomass_vars: self.logger.info(f"Downloading biomassburing data for variable: {variable}") self.download_raw_input_single_var(variable=variable, institution_id="VUA") - if self.download_metafiles: - for variable in self.meta_vars_percentage: + if self.config.download_metafiles: + for variable in self.config.meta_vars_percentage: # percentage are historic and have no scenarios self.logger.info(f"Downloading meta percentage data for variable: {variable}") self.download_meta_historic_biomassburning_single_var(variable=variable, institution_id="VUA") - for variable in self.meta_vars_share: + for variable in self.config.meta_vars_share: self.logger.info(f"Downloading meta openburning share data for variable: {variable}") self.download_raw_input_single_var(variable=variable, institution_id="IAMC") @@ -63,7 +65,7 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info("Using download_raw_input_single_var() function") facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.config.node_link, distrib=False) ctx = conn.new_context( project=project, @@ -87,7 +89,10 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info(f"Result len {len(results)}") if len(results) > 0: download_raw_input_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir + institution_id=institution_id, + search_results=results, + variable=variable, + base_path=self.config.data_dir, ) def download_meta_historic_biomassburning_single_var( @@ -113,7 +118,7 @@ def download_meta_historic_biomassburning_single_var( variable_id = variable.replace("_", "-") variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" self.logger.info(variable, variable_id, institution_id) - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.config.node_link, distrib=False) facets = "nominal_resolution,version" ctx = conn.new_context( project=project, @@ -137,5 +142,5 @@ def download_meta_historic_biomassburning_single_var( self.logger.info(f"List of results :\n{result_list}") download_metadata_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir + institution_id=institution_id, search_results=results, variable=variable, base_path=self.config.data_dir ) From 770c00326145ffaa3bcf5488857ec7ffd9d1cc52 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 25 Feb 2025 19:30:12 -0500 Subject: [PATCH 15/38] Implement config classes --- climateset/download/cmip6_downloader.py | 62 ++++--- climateset/download/downloader.py | 100 +++++------ ...nloader_config.py => downloader_config.py} | 155 ++++++++++++------ climateset/download/input4mips_downloader.py | 47 ++++-- climateset/download/utils.py | 45 ++--- configs/core_dataset.yaml | 12 +- 6 files changed, 241 insertions(+), 180 deletions(-) rename climateset/download/{abstract_downloader_config.py => downloader_config.py} (55%) diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py index 312f857..98c9d1e 100644 --- a/climateset/download/cmip6_downloader.py +++ b/climateset/download/cmip6_downloader.py @@ -1,10 +1,15 @@ -from abstract_downloader import AbstractDownloader from pyesgf.search import SearchConnection +from climateset.download.abstract_downloader import AbstractDownloader +from climateset.download.constants.esgf import CMIP6 +from climateset.download.downloader_config import ( + CMIP6DownloaderConfig, + create_cmip6_downloader_config_from_file, +) from climateset.download.utils import ( - _handle_base_search_constraints, download_model_variable, get_upload_version, + handle_base_search_constraints, ) from climateset.utils import create_logger @@ -12,8 +17,9 @@ class CMIP6Downloader(AbstractDownloader): - def __init__(self): + def __init__(self, config: CMIP6DownloaderConfig): self.logger = LOGGER + self.config = config def download(self): """ @@ -33,23 +39,25 @@ def download(self): available value """ - for variable in self.model_vars: + for variable in self.config.variables: self.logger.info(f"Downloading data for variable: {variable}") - for experiment in self.experiments: - if experiment in self.SUPPORTED_EXPERIMENTS: - self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) - else: + for experiment in self.config.experiments: + if experiment not in self.config.avail_experiments: self.logger.info( f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{self.SUPPORTED_EXPERIMENTS}. Skipping." + f"{self.config.avail_experiments}. Skipping." ) + continue + self.logger.info(f"Downloading data for experiment: {experiment}") + self.download_from_model_single_var( + project=self.config.project, variable=variable, experiment=experiment + ) def download_from_model_single_var( # noqa: C901 self, variable: str, experiment: str, - project: str = "CMIP6", + project: str = CMIP6, default_frequency: str = "mon", preferred_version: str = "latest", default_grid_label: str = "gn", @@ -66,7 +74,7 @@ def download_from_model_single_var( # noqa: C901 preferred_version: data upload version, if 'latest', the newest version will get selected always default_grid_label: default gridding method in which the data is provided """ - conn = SearchConnection(url=self.model_node_link, distrib=False) + conn = SearchConnection(url=self.config.node_link, distrib=False) facets = ( "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " @@ -78,12 +86,12 @@ def download_from_model_single_var( # noqa: C901 ctx = conn.new_context( project=project, experiment_id=experiment, - source_id=self.model, + source_id=self.config.model, variable=variable, facets=facets, ) - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) variants = list(ctx.facet_counts["variant_label"]) @@ -99,19 +107,19 @@ def download_from_model_single_var( # noqa: C901 self.logger.info(f"Length : {len(variants)}") # TODO refactor logic of if/else - if not self.ensemble_members: - if self.max_ensemble_members > len(variants): + if not self.config.ensemble_members: + if self.config.max_ensemble_members > len(variants): self.logger.info("Less ensemble members available than maximum number desired. Including all variants.") ensemble_member_final_list = variants else: self.logger.info( - f"{len(variants)} ensemble members available than desired (max {self.max_ensemble_members}. " - f"Choosing only the first {self.max_ensemble_members}.)." + f"{len(variants)} ensemble members available than desired (max {self.config.max_ensemble_members}. " + f"Choosing only the first {self.config.max_ensemble_members}.)." ) - ensemble_member_final_list = variants[: self.max_ensemble_members] + ensemble_member_final_list = variants[: self.config.max_ensemble_members] else: - self.logger.info(f"Desired list of ensemble members given: {self.ensemble_members}") - ensemble_member_final_list = list(set(variants) & set(self.ensemble_members)) + self.logger.info(f"Desired list of ensemble members given: {self.config.ensemble_members}") + ensemble_member_final_list = list(set(variants) & set(self.config.ensemble_members)) if len(ensemble_member_final_list) == 0: self.logger.info("WARNING: no overlap between available and desired ensemble members!") self.logger.info("Skipping.") @@ -130,5 +138,15 @@ def download_from_model_single_var( # noqa: C901 self.logger.info(f"Result len {len(results)}") download_model_variable( - model_id=self.model, search_results=results, variable=variable, base_path=self.data_dir + project=CMIP6, + model_id=self.config.model, + search_results=results, + variable=variable, + base_path=self.config.data_dir, ) + + +def cmip6_download_from_config(config): + config_object = create_cmip6_downloader_config_from_file(config) + downloader = CMIP6Downloader(config=config_object) + downloader.download() diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index bdca350..c2fd516 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -5,17 +5,26 @@ from pyesgf.search import SearchConnection from climateset import RAW_DATA +from climateset.download.cmip6_downloader import CMIP6Downloader from climateset.download.constants.esgf import ( + CMIP6, ESGF_MODEL_OUTPUT_LIST, ESGF_PROJECTS_CONSTANTS, - ESGF_RAW_INPUT_LIST, + INPUT4MIPS, ) +from climateset.download.downloader_config import ( + AVAILABLE_CONFIGS, + create_cmip6_downloader_config_from_file, + create_input4mips_downloader_config_from_file, + match_project_key, +) +from climateset.download.input4mips_downloader import Input4MipsDownloader from climateset.download.utils import ( - _handle_base_search_constraints, download_metadata_variable, download_model_variable, download_raw_input_variable, get_upload_version, + handle_base_search_constraints, ) from climateset.utils import create_logger, get_yaml_config @@ -269,7 +278,6 @@ def download_from_model_single_var( # noqa: C901 preferred_version: data upload version, if 'latest', the newest version will get selected always default_grid_label: default gridding method in which the data is provided """ - conn = SearchConnection(url=self.node_link, distrib=False) facets = ( "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " @@ -278,6 +286,9 @@ def download_from_model_single_var( # noqa: C901 self.logger.info("Using download_from_model_single_var() function") + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + conn = SearchConnection(url=self.node_link, distrib=False) ctx = conn.new_context( project=project, experiment_id=experiment, @@ -286,7 +297,7 @@ def download_from_model_single_var( # noqa: C901 facets=facets, ) - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) variants = list(ctx.facet_counts["variant_label"]) @@ -359,16 +370,17 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info("Using download_raw_input_single_var() function") facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - conn = SearchConnection(url=self.node_link, distrib=False) + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + conn = SearchConnection(url=self.node_link, distrib=False) ctx = conn.new_context( project=project, variable=variable, institution_id=institution_id, facets=facets, ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) mips_targets = list(ctx.facet_counts["target_mip"]) self.logger.info(f"Available target mips: {mips_targets}") @@ -409,8 +421,11 @@ def download_meta_historic_biomassburning_single_var( variable_id = variable.replace("_", "-") variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" self.logger.info(variable, variable_id, institution_id) - conn = SearchConnection(url=self.node_link, distrib=False) facets = "nominal_resolution,version" + + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + conn = SearchConnection(url=self.node_link, distrib=False) ctx = conn.new_context( project=project, variable=variable_search, @@ -419,8 +434,7 @@ def download_meta_historic_biomassburning_single_var( target_mip="CMIP", facets=facets, ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) version = get_upload_version(context=ctx, preferred_version=preferred_version) if version: @@ -502,53 +516,29 @@ def download_raw_input(self): self.download_raw_input_single_var(variable=variable, institution_id="IAMC") -def download_from_config_file(config: str, logger: logging.Logger = LOGGER): +def download_from_config_file(config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER): """ This function downloads variables automatically from input config file Args: - config: Can be a dictionary containing configurations or a path to a configuration yaml file + config_file: Path to a configuration yaml file logger: Logging instance """ - if not isinstance(config, dict): - if isinstance(config, str): - config = pathlib.Path(config) - config = get_yaml_config(config) - - # get the supported esgf projects (cmip6, cmip6plus, input4mips) - implemented_projects = ESGF_PROJECTS_CONSTANTS.keys() - - # flag to check if at least a single project was found - project_found = False - - # iterate over all listed projects and download the requested data - for project_name, project_kwargs in config.items(): - downloader_kwargs = project_kwargs["downloader_kwargs"] - logger.info(f"Start downloading requested data for project {project_name}:") - logger.info(f" Downloader kwargs : {downloader_kwargs}") - - # project not found issues a warning - if project_name not in implemented_projects: - logger.warning( - f"The listed project {project_name} is not recognized. List of recognized projects: {implemented_projects}. Consider extending the downloader for the esgf project you would like to download. Continues attempting downloading data of other listed projects." - ) - continue - - # projects that have to be downloaded model wise, e.g. cmip6 - if project_name in ESGF_RAW_INPUT_LIST: - downloader = Downloader(project=project_name, model=None, **downloader_kwargs, logger=logger) - downloader.download_raw_input() - project_found = True - - # projects that have to be downloaded model-independent, e.g. input4mips - if project_name in ESGF_MODEL_OUTPUT_LIST: - for m in project_kwargs["models"]: - downloader = Downloader(project=project_name, model=m, **downloader_kwargs, logger=logger) - downloader.download_from_model() - project_found = True - - logger.info(f"Completed downloading data for project {project_name};") - - if not project_found: - raise ValueError( - f"Failed to download the requested project data because none was recognized. Recognized projects are: {implemented_projects}." - ) + if isinstance(config_file, str): + config_file = pathlib.Path(config_file) + config_dict = get_yaml_config(config_file) + + downloader_factory = { + INPUT4MIPS: {"configs": create_input4mips_downloader_config_from_file, "downloader": Input4MipsDownloader}, + CMIP6: {"configs": create_cmip6_downloader_config_from_file, "downloader": CMIP6Downloader}, + } + + verified_config_keys = [] + for config_key in config_dict: + verified_key = match_project_key(input_key=config_key, key_list=AVAILABLE_CONFIGS) + if verified_key: + verified_config_keys.append(verified_key) + + for config_key in verified_config_keys: + configs = downloader_factory[config_key]["configs"](config_file=config_file) + downloader = downloader_factory[config_key]["downloader"](config=configs) + downloader.download() diff --git a/climateset/download/abstract_downloader_config.py b/climateset/download/downloader_config.py similarity index 55% rename from climateset/download/abstract_downloader_config.py rename to climateset/download/downloader_config.py index d2b5ed3..cac97c8 100644 --- a/climateset/download/abstract_downloader_config.py +++ b/climateset/download/downloader_config.py @@ -1,3 +1,5 @@ +import copy +import inspect import logging from abc import ABC from pathlib import Path @@ -6,11 +8,18 @@ import yaml from climateset import CONFIGS, RAW_DATA -from climateset.download.constants.esgf import ESGF_PROJECTS, ESGF_PROJECTS_CONSTANTS -from climateset.utils import create_logger +from climateset.download.constants.esgf import ( + CMIP6, + ESGF_PROJECTS, + ESGF_PROJECTS_CONSTANTS, + INPUT4MIPS, +) +from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) +AVAILABLE_CONFIGS = frozenset([CMIP6, INPUT4MIPS]) + class AbstractDownloaderConfig(ABC): def __init__( @@ -51,20 +60,41 @@ def __init__( self.avail_variables = self.proj_constants.VAR_SOURCE_LOOKUP self.avail_experiments = self.proj_constants.SUPPORTED_EXPERIMENTS - def generate_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + @staticmethod + def _handle_yaml_config_path(config_file_name, config_path): if isinstance(config_path, str): config_path = Path(config_path) if not config_file_name.endswith(".yaml"): config_file_name = f"{config_file_name}.yaml" - config_full_path = config_path / config_file_name - data = {self.project: {}} + return config_full_path + + def generate_config_dict(self): + init_params = inspect.signature(self.__init__).parameters + init_args = set(init_params.keys()) - {"self"} + config_dict = {self.project: {}} for key, value in self.__dict__.items(): - if key not in ["project", "logger"] and not callable(value): - data[self.project][key] = value + if key in init_args and key not in ["project", "logger"] and not callable(value): + config_dict[self.project][key] = value + return config_dict + + def generate_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + config_full_path = self._handle_yaml_config_path(config_file_name, config_path) + data = self.generate_config_dict() with open(config_full_path, "w") as config_file: yaml.dump(data, config_file, indent=2) + def add_to_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + config_full_path = self._handle_yaml_config_path(config_file_name, config_path) + existing_config = {} + if config_full_path.exists(): + existing_config = get_yaml_config(config_full_path) + existing_config.update(existing_config) + new_config = self.generate_config_dict() + existing_config.update(new_config) + with open(config_full_path, "w") as config_file: + yaml.dump(existing_config, config_file, indent=2) + class Input4mipsDownloaderConfig(AbstractDownloaderConfig): def __init__( @@ -84,7 +114,6 @@ def __init__( self.download_metafiles: bool = download_metafiles # TODO infer automatically from vars self.download_biomass_burning: bool = download_biomassburning # TODO infer automatically from vars self.use_plain_emission_vars: bool = use_plain_emission_vars - self.emissions_endings = self.proj_constants.EMISSIONS_ENDINGS self.meta_endings_prc = self.proj_constants.META_ENDINGS_PRC self.meta_endings_share = self.proj_constants.META_ENDINGS_SHAR @@ -93,21 +122,17 @@ def __init__( # Attributes that are going to be retrieved / set within this class for ## (all) - self.vars: list[str] = variables ## (climate model inputs) self.biomass_vars: list[str] = [] self.meta_vars_percentage: list[str] = [] self.meta_vars_share: list[str] = [] - self._handle_emission_variables( - variables=variables, - ) + self._handle_emission_variables() - def _handle_emission_variables(self, variables: list[str]): - self.vars = [] - self._generate_raw_emission_vars(variables=variables) + def _handle_emission_variables(self): + self._generate_raw_emission_vars() self._generate_plain_emission_vars() - self.logger.info(f"Emission variables to download: {self.vars}") + self.logger.info(f"Emission variables to download: {self.variables}") if self.download_biomass_burning: self.logger.info(f"Biomass burning vars to download: {self.biomass_vars}") if self.download_metafiles: @@ -115,60 +140,45 @@ def _handle_emission_variables(self, variables: list[str]): f"Meta emission vars to download:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}" ) - def _generate_raw_emission_vars(self, variables: list[str]): + def _generate_raw_emission_vars(self): + variables = copy.deepcopy(self.variables) if variables is None: - # variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] raise ValueError("No variables have been given to the downloader. Variables must be given for downloader.") - variables = [v.replace(" ", "_").replace("-", "_") for v in variables] - self.logger.info(f"Cleaned variables : {variables}") - for v in variables: - self.vars.append(v) + self.variables = [v.replace(" ", "_").replace("-", "_") for v in variables] + self.logger.info(f"Cleaned variables : {self.variables}") def _generate_plain_emission_vars(self): if self.use_plain_emission_vars: # plain vars are biomass vars - self.biomass_vars = self.vars - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_prc - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_share - ] - - self.vars = [ - variable + emission_ending for variable in self.vars for emission_ending in self.emissions_endings + self.biomass_vars = self.variables + self.variables = [ + variable + emission_ending for variable in self.variables for emission_ending in self.emissions_endings ] # be careful with CO2 - if "CO2_em_openburning" in self.vars: - self.vars.remove("CO2_em_openburning") + if "CO2_em_openburning" in self.variables: + self.variables.remove("CO2_em_openburning") else: # get plain input4mips vars = biomass vars for historical - self.biomass_vars = list({v.split("_")[0] for v in self.vars}) + self.biomass_vars = list({v.split("_")[0] for v in self.variables}) # remove biomass vars from normal vars list for b in self.biomass_vars: try: - self.vars.remove(b) + self.variables.remove(b) except Exception as error: self.logger.warning(f"Caught the following exception but continuing : {error}") - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_prc - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_share - ] + self.meta_vars_percentage = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_prc + ] + self.meta_vars_share = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_share + ] class CMIP6DownloaderConfig(AbstractDownloaderConfig): @@ -176,11 +186,48 @@ def __init__( self, project: str, data_dir: str = RAW_DATA, + model: Union[str, None] = "NorESM2-LM", experiments: list[str] = None, + ensemble_members: list[str] = None, # preferred ensemble members used, if None not considered + max_ensemble_members: int = 10, # if -1 take all variables: list[str] = None, overwrite: bool = False, logger: logging.Logger = LOGGER, ): super().__init__(project, data_dir, experiments, variables, overwrite, logger) + self.model: str = model self.avail_models = self.proj_constants.MODEL_SOURCES + self.ensemble_members: list[str] = ensemble_members + self.max_ensemble_members: int = max_ensemble_members + + +def match_project_key(input_key: str, key_list: list[str]) -> Union[str, None]: + for key in key_list: + if input_key.lower() == key.lower(): + return key + if input_key.upper() == key.upper(): + return key + return None + + +def _get_config_from_file(config_file, config_id, config_class, logger=LOGGER): + configs = get_yaml_config(config_file) + config_key = config_id + if config_key not in configs: + config_key = match_project_key(config_key, list(configs.keys())) + if not config_key: + logger.error(f"Config key [{config_id}] not found in config file [{config_file}]") + class_configs = configs[config_key] + config_object = config_class(project=config_id, **class_configs) + return config_object + + +def create_input4mips_downloader_config_from_file(config_file) -> Input4mipsDownloaderConfig: + config_object = _get_config_from_file(config_file, INPUT4MIPS, Input4mipsDownloaderConfig) + return config_object + + +def create_cmip6_downloader_config_from_file(config_file) -> CMIP6DownloaderConfig: + config_object = _get_config_from_file(config_file, CMIP6, CMIP6DownloaderConfig) + return config_object diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py index e76a487..c58e7b2 100644 --- a/climateset/download/input4mips_downloader.py +++ b/climateset/download/input4mips_downloader.py @@ -1,12 +1,16 @@ -from abstract_downloader import AbstractDownloader from pyesgf.search import SearchConnection -from climateset.download.abstract_downloader_config import Input4mipsDownloaderConfig +from climateset.download.abstract_downloader import AbstractDownloader +from climateset.download.constants.esgf import INPUT4MIPS +from climateset.download.downloader_config import ( + Input4mipsDownloaderConfig, + create_input4mips_downloader_config_from_file, +) from climateset.download.utils import ( - _handle_base_search_constraints, download_metadata_variable, download_raw_input_variable, get_upload_version, + handle_base_search_constraints, ) from climateset.utils import create_logger @@ -16,11 +20,10 @@ class Input4MipsDownloader(AbstractDownloader): def __init__(self, config: Input4mipsDownloaderConfig): self.config: Input4mipsDownloaderConfig = config - self.raw_vars = "" self.logger = LOGGER def download(self): - for variable in self.raw_vars: + for variable in self.config.variables: if variable.endswith("openburning"): institution_id = "IAMC" else: @@ -45,7 +48,7 @@ def download(self): def download_raw_input_single_var( # noqa: C901 self, variable: str, - project: str = "input4mips", + project: str = INPUT4MIPS, institution_id: str = "PNNL-JGCRI", default_frequency: str = "mon", preferred_version: str = "latest", @@ -56,7 +59,7 @@ def download_raw_input_single_var( # noqa: C901 Args: variable: variable ID - project: umbrella project, here "input4mips" + project: umbrella project, here "input4MIPs" institution_id: id of the institution that provides the data default_frequency: default frequency to download preferred_version: data upload version, if 'latest', the newest version will get selected always @@ -65,16 +68,17 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info("Using download_raw_input_single_var() function") facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - conn = SearchConnection(url=self.config.node_link, distrib=False) + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + conn = SearchConnection(url=self.config.node_link, distrib=False) ctx = conn.new_context( project=project, variable=variable, institution_id=institution_id, facets=facets, ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) mips_targets = list(ctx.facet_counts["target_mip"]) self.logger.info(f"Available target mips: {mips_targets}") @@ -89,6 +93,7 @@ def download_raw_input_single_var( # noqa: C901 self.logger.info(f"Result len {len(results)}") if len(results) > 0: download_raw_input_variable( + project=INPUT4MIPS, institution_id=institution_id, search_results=results, variable=variable, @@ -99,7 +104,7 @@ def download_meta_historic_biomassburning_single_var( self, variable: str, institution_id: str, - project: str = "input4mips", + project: str = INPUT4MIPS, default_grid_label: str = "gn", default_frequency: str = "mon", preferred_version: str = "latest", @@ -118,8 +123,11 @@ def download_meta_historic_biomassburning_single_var( variable_id = variable.replace("_", "-") variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" self.logger.info(variable, variable_id, institution_id) - conn = SearchConnection(url=self.config.node_link, distrib=False) facets = "nominal_resolution,version" + + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + conn = SearchConnection(url=self.config.node_link, distrib=False) ctx = conn.new_context( project=project, variable=variable_search, @@ -128,8 +136,7 @@ def download_meta_historic_biomassburning_single_var( target_mip="CMIP", facets=facets, ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) version = get_upload_version(context=ctx, preferred_version=preferred_version) if version: @@ -142,5 +149,15 @@ def download_meta_historic_biomassburning_single_var( self.logger.info(f"List of results :\n{result_list}") download_metadata_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.config.data_dir + project=INPUT4MIPS, + institution_id=institution_id, + search_results=results, + variable=variable, + base_path=self.config.data_dir, ) + + +def input4mips_download_from_config(config): + config_object = create_input4mips_downloader_config_from_file(config) + downloader = Input4MipsDownloader(config=config_object) + downloader.download() diff --git a/climateset/download/utils.py b/climateset/download/utils.py index a5632d8..f982485 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -5,7 +5,6 @@ import time from typing import Union -import pandas as pd import xarray as xr from climateset import RAW_DATA @@ -159,27 +158,29 @@ def _download_process(temp_download_path, search_results, logger: logging.Logger def download_raw_input_variable( - institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA + project, institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA ): if isinstance(base_path, str): base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"raw_input_vars/{institution_id}/{variable}" + temp_download_path = base_path / f"{project}/raw_input_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) -def download_model_variable(model_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA): +def download_model_variable( + project, model_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA +): if isinstance(base_path, str): base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"model_vars/{model_id}/{variable}" + temp_download_path = base_path / f"{project}/{model_id}/{variable}" _download_process(temp_download_path, search_results) def download_metadata_variable( - institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA + project, institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA ): if isinstance(base_path, str): base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"meta_vars/{institution_id}/{variable}" + temp_download_path = base_path / f"{project}/meta_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) @@ -202,25 +203,6 @@ def get_grid_label(context, default_grid_label, logger=LOGGER): return grid_label -def get_max_ensemble_member_number(df_model_source: pd.DataFrame, experiments: list[str], model: str, logger=LOGGER): - if model is not None: - if model not in df_model_source["source_id"].tolist(): - logger.info(f"Model {model} not supported.") - raise AttributeError - model_id = df_model_source.index[df_model_source["source_id"] == model].values - # get ensemble members per scenario - max_ensemble_members_list = df_model_source["num_ensemble_members"][model_id].values.tolist()[0].split(" ") - scenarios = df_model_source["scenarios"][model_id].values.tolist()[0].split(" ") - max_ensemble_members_lookup = {} - for s, m in zip(scenarios, max_ensemble_members_list): - max_ensemble_members_lookup[s] = int(m) - filtered_experiments = (e for e in experiments if e != "historical") - max_possible_member_number = min( - max_ensemble_members_lookup[e] for e in filtered_experiments - ) # TODO fix historical - return max_possible_member_number - - def get_upload_version(context, preferred_version, logger=LOGGER): version = "" versions = [] @@ -262,7 +244,7 @@ def get_frequency(context, default_frequency, logger=LOGGER): return frequency -def _handle_base_search_constraints(ctx, default_frequency, default_grid_label): +def handle_base_search_constraints(ctx, default_frequency, default_grid_label): grid_label = get_grid_label(context=ctx, default_grid_label=default_grid_label) if grid_label: ctx = ctx.constrain(grid_label=grid_label) @@ -273,3 +255,12 @@ def _handle_base_search_constraints(ctx, default_frequency, default_grid_label): if frequency: ctx = ctx.constrain(frequency=frequency) return ctx + + +def handle_yaml_config_path(config_file_name, config_path): + if isinstance(config_path, str): + config_path = pathlib.Path(config_path) + if not config_file_name.endswith(".yaml"): + config_file_name = f"{config_file_name}.yaml" + config_full_path = config_path / config_file_name + return config_full_path diff --git a/configs/core_dataset.yaml b/configs/core_dataset.yaml index 89cca9c..e9ab512 100644 --- a/configs/core_dataset.yaml +++ b/configs/core_dataset.yaml @@ -1,9 +1,7 @@ CMIP6: - models: ["NorESM2-LM"] - downloader_kwargs: - variables: ["tas", "pr"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] + model: "NorESM2-LM" + variables: ["tas", "pr"] + experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] input4MIPs: - downloader_kwargs: - variables: ["CO2", "BC", "SO2", "CH4"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] + variables: ["CO2", "BC", "SO2", "CH4"] + experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] From 0ea3aae0a2e6b82bdbb782a9c87559c25466888e Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 25 Feb 2025 19:30:41 -0500 Subject: [PATCH 16/38] Update tests --- tests/resources/test_minimal_dataset.yaml | 11 +- tests/test_download/test_downloader.py | 121 +++++++++++++--------- 2 files changed, 80 insertions(+), 52 deletions(-) diff --git a/tests/resources/test_minimal_dataset.yaml b/tests/resources/test_minimal_dataset.yaml index 86bf9c0..2b99528 100644 --- a/tests/resources/test_minimal_dataset.yaml +++ b/tests/resources/test_minimal_dataset.yaml @@ -1,4 +1,7 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas", "CO2", "CH4"] - experiments: ["historical", "ssp126"] \ No newline at end of file +CMIP6: + model: "NorESM2-LM" + variables: ["tas"] + experiments: ["historical","ssp126"] +input4MIPs: + variables: ["CO2", "CH4"] + experiments: ["historical","ssp126"] \ No newline at end of file diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 83a25ae..541f88a 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -1,18 +1,28 @@ +import shutil from unittest.mock import call, patch import pytest from climateset import TEST_DIR -from climateset.download.downloader import Downloader, download_from_config_file -from climateset.utils import get_yaml_config +from climateset.download.cmip6_downloader import CMIP6Downloader +from climateset.download.constants.esgf import CMIP6, INPUT4MIPS +from climateset.download.downloader import download_from_config_file +from climateset.download.downloader_config import ( + create_cmip6_downloader_config_from_file, + create_input4mips_downloader_config_from_file, +) +from climateset.download.input4mips_downloader import Input4MipsDownloader MINIMAL_DATASET_CONFIG_PATH = TEST_DIR / "resources/test_minimal_dataset.yaml" +TEST_TMP_DIR = TEST_DIR / "resources/.tmp" -DOWNLOAD_RAW_INPUT_SINGLE_VAR = "climateset.download.downloader.Downloader.download_raw_input_single_var" +DOWNLOAD_RAW_INPUT_SINGLE_VAR = ( + "climateset.download.input4mips_downloader.Input4MipsDownloader.download_raw_input_single_var" +) DOWNLOAD_META_HISTORIC_SINGLE_VAR = ( - "climateset.download.downloader.Downloader.download_meta_historic_biomassburning_single_var" + "climateset.download.input4mips_downloader.Input4MipsDownloader.download_meta_historic_biomassburning_single_var" ) -DOWNLOAD_MODEL_SINGLE_VAR = "climateset.download.downloader.Downloader.download_from_model_single_var" +DOWNLOAD_MODEL_SINGLE_VAR = "climateset.download.cmip6_downloader.CMIP6Downloader.download_from_model_single_var" SUBPROCESS_RUN = "subprocess.run" EXPECTED_MINIMAL_RAW_INPUT_CALLS = [ @@ -36,11 +46,15 @@ ] EXPECTED_MINIMAL_MODEL_CALLS = [ - call(variable="tas", experiment="historical"), - call(variable="tas", experiment="ssp126"), + call(project="CMIP6", variable="tas", experiment="historical"), + call(project="CMIP6", variable="tas", experiment="ssp126"), ] +def delete_tmp_dir(): + shutil.rmtree(TEST_TMP_DIR, ignore_errors=True) + + @pytest.fixture() def mock_raw_input_single_var(): with patch(DOWNLOAD_RAW_INPUT_SINGLE_VAR) as mock_function: @@ -66,38 +80,51 @@ def mock_subprocess_run(): @pytest.fixture -def simple_downloader_object(): +def input4mips_downloader_object(): config_source = MINIMAL_DATASET_CONFIG_PATH - config = get_yaml_config(config_source) - model = config["models"][0] - downloader_kwargs = config["downloader_kwargs"] - return Downloader(model=model, **downloader_kwargs) + config = create_input4mips_downloader_config_from_file(config_source) + config.data_dir = TEST_TMP_DIR + yield Input4MipsDownloader(config=config) + delete_tmp_dir() -def test_downloader_init(simple_downloader_object): - assert isinstance(simple_downloader_object, Downloader) +@pytest.fixture +def cmip6_downloader_object(): + config_source = MINIMAL_DATASET_CONFIG_PATH + config = create_cmip6_downloader_config_from_file(config_source) + config.data_dir = TEST_TMP_DIR + yield CMIP6Downloader(config=config) + delete_tmp_dir() + +def test_downloader_init(input4mips_downloader_object, cmip6_downloader_object): + assert isinstance(input4mips_downloader_object, Input4MipsDownloader) + assert isinstance(cmip6_downloader_object, CMIP6Downloader) -def test_downloader_base_params(simple_downloader_object): - assert simple_downloader_object.model == "NorESM2-LM" - assert simple_downloader_object.experiments == ["historical", "ssp126"] +def test_downloader_base_params(input4mips_downloader_object, cmip6_downloader_object): + assert input4mips_downloader_object.config.project == INPUT4MIPS + assert input4mips_downloader_object.config.experiments == ["historical", "ssp126"] + assert cmip6_downloader_object.config.project == CMIP6 + assert cmip6_downloader_object.config.model == ["NorESM2-LM"] + assert cmip6_downloader_object.config.experiments == ["historical", "ssp126"] -def test_downloader_max_possible_member_number(simple_downloader_object): - assert simple_downloader_object.max_ensemble_members == 1 +def test_downloader_max_possible_member_number(cmip6_downloader_object): + assert cmip6_downloader_object.config.max_ensemble_members == 10 -def test_downloader_variables(simple_downloader_object): - assert simple_downloader_object.raw_vars == [ + +def test_downloader_variables(input4mips_downloader_object, cmip6_downloader_object): + assert cmip6_downloader_object.config.variables == ["tas"] + assert input4mips_downloader_object.config.variables == [ "CO2_em_anthro", "CO2_em_AIR_anthro", "CH4_em_openburning", "CH4_em_anthro", "CH4_em_AIR_anthro", ] - assert simple_downloader_object.biomass_vars == ["CO2", "CH4"] - assert simple_downloader_object.model_vars == ["tas"] - assert simple_downloader_object.meta_vars_percentage == [ + assert input4mips_downloader_object.config.biomass_vars == ["CO2", "CH4"] + assert input4mips_downloader_object.config.meta_vars_percentage == [ "CH4_percentage_AGRI", "CH4_percentage_BORF", "CH4_percentage_DEFO", @@ -105,32 +132,31 @@ def test_downloader_variables(simple_downloader_object): "CH4_percentage_SAVA", "CH4_percentage_TEMF", ] - assert simple_downloader_object.meta_vars_share == ["CH4_openburning_share"] + assert input4mips_downloader_object.config.meta_vars_share == ["CH4_openburning_share"] -def test_downloader_model_params(simple_downloader_object): - assert simple_downloader_object.model_node_link == "https://esgf-data.dkrz.de/esg-search" - assert simple_downloader_object.model_source_center == "NCC" +def test_downloader_model_params(cmip6_downloader_object): + assert cmip6_downloader_object.config.node_link == "http://esgf-node.llnl.gov/esg-search/" -def test_download_raw_input(simple_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): - simple_downloader_object.download_raw_input() +def test_download_raw_input(input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): + input4mips_downloader_object.download() assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS assert mock_raw_input_single_var.call_count == 8 assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS assert mock_meta_historic_single_var.call_count == 6 -def test_download_from_model(simple_downloader_object, mock_model_single_var): - simple_downloader_object.download_from_model() +def test_download_from_model(cmip6_downloader_object, mock_model_single_var): + cmip6_downloader_object.download() assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS assert mock_model_single_var.call_count == 2 def test_download_from_config_file( - simple_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var, mock_model_single_var + input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var, mock_model_single_var ): - download_from_config_file(config=MINIMAL_DATASET_CONFIG_PATH) + download_from_config_file(config_file=MINIMAL_DATASET_CONFIG_PATH) assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS assert mock_raw_input_single_var.call_count == 8 @@ -153,26 +179,25 @@ def _assert_content_is_in_wget_script(mock_call, string_content): assert string_content in wget_script_content -def test_download_raw_input_single_var(simple_downloader_object, mock_subprocess_run): +def test_download_raw_input_single_var(input4mips_downloader_object, mock_subprocess_run): download_subprocess = mock_subprocess_run - simple_downloader_object.download_raw_input_single_var(variable="CO2_em_anthro", institution_id="PNNL-JGCRI") + input4mips_downloader_object.download_raw_input_single_var(variable="CO2_em_anthro", institution_id="PNNL-JGCRI") expected_files = [ - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_175001-179912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_180001-184912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_185001-185012.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_185101-189912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_190001-194912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_195001-199912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_200001-201412.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_175001-179912.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_180001-184912.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_185001-189912.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_190001-194912.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_195001-199912.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_200001-202212.nc", ] download_subprocess.assert_called_once() for f in expected_files: _assert_content_is_in_wget_script(download_subprocess, f) -def test_download_meta_historic_biomassburning_single_var(simple_downloader_object, mock_subprocess_run): - simple_downloader_object.download_meta_historic_biomassburning_single_var( +def test_download_meta_historic_biomassburning_single_var(input4mips_downloader_object, mock_subprocess_run): + input4mips_downloader_object.download_meta_historic_biomassburning_single_var( variable="CH4_percentage_AGRI", institution_id="VUA" ) @@ -184,8 +209,8 @@ def test_download_meta_historic_biomassburning_single_var(simple_downloader_obje _assert_content_is_in_wget_script(mock_call=mock_subprocess_run, string_content=f) -def test_download_from_model_single_var(simple_downloader_object, mock_subprocess_run): - simple_downloader_object.download_from_model_single_var(variable="tas", experiment="ssp126") +def test_download_from_model_single_var(cmip6_downloader_object, mock_subprocess_run): + cmip6_downloader_object.download_from_model_single_var(variable="tas", experiment="ssp126") expected_files = [ "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_201501-202012.nc", @@ -198,6 +223,6 @@ def test_download_from_model_single_var(simple_downloader_object, mock_subproces "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_208101-209012.nc", "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_209101-210012.nc", ] - mock_subprocess_run.assert_called_once() + mock_subprocess_run.assert_called() for f in expected_files: _assert_content_is_in_wget_script(mock_call=mock_subprocess_run, string_content=f) From 9df2456d776fced87bbdc6df27dbc571f2a8ad7b Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 13:00:50 -0500 Subject: [PATCH 17/38] Refactor CMIP6Downloader for multiple models --- climateset/download/cmip6_downloader.py | 27 +++++------ climateset/download/downloader_config.py | 55 ++++++++++++++++++----- climateset/download/utils.py | 9 ++++ tests/resources/test_minimal_dataset.yaml | 4 +- tests/test_download/test_downloader.py | 13 +++--- 5 files changed, 73 insertions(+), 35 deletions(-) diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py index 98c9d1e..9a1748b 100644 --- a/climateset/download/cmip6_downloader.py +++ b/climateset/download/cmip6_downloader.py @@ -38,23 +38,19 @@ def download(self): If the constraints cannot be met, per default behaviour for the downloader to select first other available value """ - - for variable in self.config.variables: - self.logger.info(f"Downloading data for variable: {variable}") - for experiment in self.config.experiments: - if experiment not in self.config.avail_experiments: - self.logger.info( - f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{self.config.avail_experiments}. Skipping." + for model in self.config.models: + self.logger.info(f"Downloading data for model: [{model}]") + for variable in self.config.variables: + self.logger.info(f"Downloading data for variable: [{variable}]") + for experiment in self.config.experiments: + self.logger.info(f"Downloading data for experiment: [{experiment}]") + self.download_from_model_single_var( + model=model, project=self.config.project, variable=variable, experiment=experiment ) - continue - self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var( - project=self.config.project, variable=variable, experiment=experiment - ) def download_from_model_single_var( # noqa: C901 self, + model: str, variable: str, experiment: str, project: str = CMIP6, @@ -67,6 +63,7 @@ def download_from_model_single_var( # noqa: C901 (CMIP data). Args: + model (str): The model ID variable: variable ID experiment: experiment ID project: umbrella project id e.g. CMIPx @@ -86,7 +83,7 @@ def download_from_model_single_var( # noqa: C901 ctx = conn.new_context( project=project, experiment_id=experiment, - source_id=self.config.model, + source_id=model, variable=variable, facets=facets, ) @@ -139,7 +136,7 @@ def download_from_model_single_var( # noqa: C901 download_model_variable( project=CMIP6, - model_id=self.config.model, + model_id=model, search_results=results, variable=variable, base_path=self.config.data_dir, diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index cac97c8..dc3d00c 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -14,6 +14,7 @@ ESGF_PROJECTS_CONSTANTS, INPUT4MIPS, ) +from climateset.download.utils import match_key_in_list from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) @@ -59,6 +60,41 @@ def __init__( self.node_link = self.proj_constants.NODE_LINK self.avail_variables = self.proj_constants.VAR_SOURCE_LOOKUP self.avail_experiments = self.proj_constants.SUPPORTED_EXPERIMENTS + self.config_is_valid = True + + self._validate_item_list( + item_list=self.variables, available_items=self.avail_variables, name_of_item="variable" + ) + self._validate_item_list( + item_list=self.experiments, available_items=self.avail_experiments, name_of_item="experiment" + ) + + def _validate_item_list(self, item_list: list[str], available_items: list[str], name_of_item: str) -> None: + """ + This small function checks that the given items (variables, models, experiments, etc.) are valid for their given + project (Input4MIPs, CMIP6, etc.). + + Also remove unvalid items from the list of items as to not. + + Args: + item_list: List of items to check (like self.variables, self.experiments, etc.) + available_items: List of available items against which to check (like self.avail_variables, etc.) + name_of_item: Name of item to check. Write lowercase and singular: ie. variable, experiment, etc. + + Returns: + None + """ + error_in_item_list = False + for e in item_list: + if e not in available_items: + self.logger.error(f"{name_of_item.capitalize()} [{e}] not supported.") + item_list.remove(e) + error_in_item_list = True + if error_in_item_list: + self.logger.error(f"Some, or all submitted {name_of_item}s were not found found - Please verify") + self.logger.error(f"Available {name_of_item}s: {available_items}") + self.logger.warning(f"List of valid submitted {name_of_item}s: {available_items}") + self.config_is_valid = False @staticmethod def _handle_yaml_config_path(config_file_name, config_path): @@ -186,7 +222,7 @@ def __init__( self, project: str, data_dir: str = RAW_DATA, - model: Union[str, None] = "NorESM2-LM", + models: list[str] = None, experiments: list[str] = None, ensemble_members: list[str] = None, # preferred ensemble members used, if None not considered max_ensemble_members: int = 10, # if -1 take all @@ -196,26 +232,23 @@ def __init__( ): super().__init__(project, data_dir, experiments, variables, overwrite, logger) - self.model: str = model + if not models: + models = ["NorESM2-LM"] + if isinstance(models, str): + models = [models] + self.models: list[str] = models self.avail_models = self.proj_constants.MODEL_SOURCES self.ensemble_members: list[str] = ensemble_members self.max_ensemble_members: int = max_ensemble_members - -def match_project_key(input_key: str, key_list: list[str]) -> Union[str, None]: - for key in key_list: - if input_key.lower() == key.lower(): - return key - if input_key.upper() == key.upper(): - return key - return None + self._validate_item_list(item_list=self.models, available_items=self.avail_models, name_of_item="model") def _get_config_from_file(config_file, config_id, config_class, logger=LOGGER): configs = get_yaml_config(config_file) config_key = config_id if config_key not in configs: - config_key = match_project_key(config_key, list(configs.keys())) + config_key = match_key_in_list(config_key, list(configs.keys())) if not config_key: logger.error(f"Config key [{config_id}] not found in config file [{config_file}]") class_configs = configs[config_key] diff --git a/climateset/download/utils.py b/climateset/download/utils.py index f982485..7581a69 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -264,3 +264,12 @@ def handle_yaml_config_path(config_file_name, config_path): config_file_name = f"{config_file_name}.yaml" config_full_path = config_path / config_file_name return config_full_path + + +def match_key_in_list(input_key: str, key_list: list[str]) -> Union[str, None]: + for key in key_list: + if input_key.lower() == key.lower(): + return key + if input_key.upper() == key.upper(): + return key + return None diff --git a/tests/resources/test_minimal_dataset.yaml b/tests/resources/test_minimal_dataset.yaml index 2b99528..a09f942 100644 --- a/tests/resources/test_minimal_dataset.yaml +++ b/tests/resources/test_minimal_dataset.yaml @@ -1,7 +1,7 @@ CMIP6: - model: "NorESM2-LM" + models: ["NorESM2-LM"] variables: ["tas"] - experiments: ["historical","ssp126"] + experiments: ["ssp126"] input4MIPs: variables: ["CO2", "CH4"] experiments: ["historical","ssp126"] \ No newline at end of file diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 541f88a..0fd6374 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -46,8 +46,7 @@ ] EXPECTED_MINIMAL_MODEL_CALLS = [ - call(project="CMIP6", variable="tas", experiment="historical"), - call(project="CMIP6", variable="tas", experiment="ssp126"), + call(model="NorESM2-LM", project="CMIP6", variable="tas", experiment="ssp126"), ] @@ -106,8 +105,8 @@ def test_downloader_base_params(input4mips_downloader_object, cmip6_downloader_o assert input4mips_downloader_object.config.project == INPUT4MIPS assert input4mips_downloader_object.config.experiments == ["historical", "ssp126"] assert cmip6_downloader_object.config.project == CMIP6 - assert cmip6_downloader_object.config.model == ["NorESM2-LM"] - assert cmip6_downloader_object.config.experiments == ["historical", "ssp126"] + assert cmip6_downloader_object.config.models == ["NorESM2-LM"] + assert cmip6_downloader_object.config.experiments == ["ssp126"] def test_downloader_max_possible_member_number(cmip6_downloader_object): @@ -150,7 +149,7 @@ def test_download_raw_input(input4mips_downloader_object, mock_raw_input_single_ def test_download_from_model(cmip6_downloader_object, mock_model_single_var): cmip6_downloader_object.download() assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS - assert mock_model_single_var.call_count == 2 + assert mock_model_single_var.call_count == 1 def test_download_from_config_file( @@ -163,7 +162,7 @@ def test_download_from_config_file( assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS assert mock_meta_historic_single_var.call_count == 6 assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS - assert mock_model_single_var.call_count == 2 + assert mock_model_single_var.call_count == 1 def _assert_content_is_in_wget_script(mock_call, string_content): @@ -210,7 +209,7 @@ def test_download_meta_historic_biomassburning_single_var(input4mips_downloader_ def test_download_from_model_single_var(cmip6_downloader_object, mock_subprocess_run): - cmip6_downloader_object.download_from_model_single_var(variable="tas", experiment="ssp126") + cmip6_downloader_object.download() expected_files = [ "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_201501-202012.nc", From df3233606c5998b3ca86510d174d772f7177db8f Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 13:01:07 -0500 Subject: [PATCH 18/38] Cleanup of downloader.py file --- climateset/download/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/climateset/download/__init__.py b/climateset/download/__init__.py index e69de29..b5bc4e2 100644 --- a/climateset/download/__init__.py +++ b/climateset/download/__init__.py @@ -0,0 +1 @@ +from .downloader import download_from_config_file # noqa F401 From 1f2ff661938eb0aa716949a8a27cce3109755b34 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 13:01:19 -0500 Subject: [PATCH 19/38] Cleanup of downloader.py file --- climateset/download/downloader.py | 509 +----------------------------- 1 file changed, 4 insertions(+), 505 deletions(-) diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index c2fd516..5ceebc5 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -1,522 +1,21 @@ -import logging import pathlib from typing import Union -from pyesgf.search import SearchConnection - -from climateset import RAW_DATA from climateset.download.cmip6_downloader import CMIP6Downloader -from climateset.download.constants.esgf import ( - CMIP6, - ESGF_MODEL_OUTPUT_LIST, - ESGF_PROJECTS_CONSTANTS, - INPUT4MIPS, -) +from climateset.download.constants.esgf import CMIP6, INPUT4MIPS from climateset.download.downloader_config import ( AVAILABLE_CONFIGS, create_cmip6_downloader_config_from_file, create_input4mips_downloader_config_from_file, - match_project_key, ) from climateset.download.input4mips_downloader import Input4MipsDownloader -from climateset.download.utils import ( - download_metadata_variable, - download_model_variable, - download_raw_input_variable, - get_upload_version, - handle_base_search_constraints, -) +from climateset.download.utils import match_key_in_list from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) -class Downloader: - """ - Class handling the downloading of the data. - - It communicates with the esgf nodes to search and download the specified data. - """ - - def __init__( # noqa: C901 - self, - project: str = "CMIP6", # default as in ClimateBench - model: Union[str, None] = "NorESM2-LM", # default as in ClimateBench - experiments: list[str] = None, # sub-selection of ClimateBench default - variables: list[str] = None, - data_dir: str = RAW_DATA, - max_ensemble_members: int = 10, # if -1 take all - ensemble_members: list[str] = None, # preferred ensemble members used, if None not considered - overwrite: bool = False, # flag if files should be overwritten - download_biomassburning: bool = True, # get biomassburning data for input4mips - download_metafiles: bool = True, # get input4mips meta files - use_plain_emission_vars: bool = True, # specifies if plain variables are given and rest is inferred - logger: logging.Logger = LOGGER, - ): - """ - Init method for the Downloader. - - Args: - project (str): Which categorie the data belongs to. Can be: CMIP6, CMIP6Plus, E3SM, input4mips, obs4mips, and more. - To date, only CMIP6, and input4mips are supported. - model: Model ID from which output should be downloaded. A list of all supported model ids can - be found in parameters.constants.MODEL_SOURCES. Model data only. - experiments: List of simulations from which data should be downloaded. Model data only. - experiments: List of variables for which data should be downloaded. Both model and raw data. - data_dir: Relative or absolute path to the directory where data should be stored. Will be created - if not yet existent. - meta_dir: Relative or absolute path to the directory where the metadata should be sored. Will be - created if not yet existent. - overwrite: Flag if files should be overwritten, if they already exist. - download_biomassburning: Flag if biomassburning data for input4mips variables should be downloaded. - download_metafiles: Flag if metafiles for input4mips variables should be downloaded. - """ - # Args init for - ## (all) - self.logger = logger - self.project: str = project - self.data_dir: Union[str, pathlib.Path] = data_dir - self.overwrite: bool = overwrite - ## (climate model output) (e.g. cmip6) - self.model: str = model - self.experiments: list[str] = experiments - self.ensemble_members: list[str] = ensemble_members - self.max_ensemble_members: int = max_ensemble_members - ## (climate model input) (e.g. input4mips) - self.download_metafiles: bool = download_metafiles # TODO infer automatically from vars - self.download_biomass_burning: bool = download_biomassburning # TODO infer automatically from vars - self.use_plain_emission_vars: bool = use_plain_emission_vars # TODO infer automatically from vars - # ESGF project constants for - ## (all) - self.node_link: str = None - self.avail_variables: list[str] = None - self.avail_experiments: list[str] = None - ## (climate model output) - self.avail_models: list[str] = None - ## (climate model input) - self.emissions_endings: list[str] = None - self.meta_endings_prc: list[str] = None - self.meta_endings_share: list[str] = None - self.mip_area: str = None - self.target_mip: str = None - # Attributes that are going to be retrieved / set within this class for - ## (all) - self.vars: list[str] = variables - ## (climate model inputs) - self.biomass_vars: list[str] = [] - self.meta_vars_percentage: list[str] = [] - self.meta_vars_share: list[str] = [] - - self._init_project_constants() - - # if max ensemble member number is too large --> we are relying on the server to complain? - - # adapt variables in case of input4mips - if self.project == "input4MIPs": - self._handle_emission_variables( - variables=variables, - ) - - self._check_desired_params() - - # @Francis (JK) I am still not happy about this function. Please let me know if I can improve this bit - def _init_project_constants(self): - """Assign/init attributed depending on the project.""" - if self.project not in ESGF_PROJECTS_CONSTANTS: - self.logger.info(f"Project {self.project} has not been implemented in the Downloader yet.") - raise ValueError( - f"Project {self.project} is not recognized. Consider adding a constant class in download/constants and the esgf.py file." - ) - proj_constants = ESGF_PROJECTS_CONSTANTS[self.project] - - # init shared constants - self.node_link = proj_constants.NODE_LINK - self.avail_variables = proj_constants.VAR_SOURCE_LOOKUP - self.avail_experiments = proj_constants.SUPPORTED_EXPERIMENTS - - # init climate model output constants - if self.project in ESGF_MODEL_OUTPUT_LIST: - self.avail_models = proj_constants.MODEL_SOURCES - - # init input4mips constants - if self.project == "input4MIPs": - self.emissions_endings = proj_constants.EMISSIONS_ENDINGS - self.meta_endings_prc = proj_constants.META_ENDINGS_PRC - self.meta_endings_share = proj_constants.META_ENDINGS_SHAR - self.mip_area = proj_constants.MIP_ERA - self.target_mip = proj_constants.TARGET_MIP - - def _check_desired_params(self): - """Check if the desired params exist.""" - # check model - if (self.model is not None) and (self.model not in self.avail_models): - self.logger.warning(f"Model {self.model} unknown.") - raise ValueError( - f"Model {self.model} is not in the list of supported models. Check for typos and consider adding it manually." - ) - - # check experiments - for exp in self.experiments: - if exp not in self.avail_experiments: - self.logger.warning(f"Experiment {exp} unknown.") - raise ValueError( - f"Experiment {exp} is not in the list of supported experiments. Check for typos and consider adding it manually." - ) - - # check variables - for var in self.vars: - if var not in self.avail_variables: - self.logger.warning(f"Variable {var} unknown.") - raise ValueError( - f"Variable {var} is not in the list of supported variables. Check for typos and consider adding it manually." - ) - - # check variables - def _handle_emission_variables(self, variables: list[str]): - if self.project == "input4MIPs": - self.vars = [] - self._generate_raw_emission_vars(variables=variables) - self._generate_plain_emission_vars() - self.logger.info(f"Emission variables to download: {self.vars}") - if self.download_biomass_burning: - self.logger.info(f"Biomass burning vars to download: {self.biomass_vars}") - if self.download_metafiles: - self.logger.info( - f"Meta emission vars to download:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}" - ) - - # def _check_models(self): - # # check if model, variable, and experiment exists - # try: - # self.node_link = self.avail_models[self.model]["node_link"] - # except KeyError: - # if self.model is not None: - # self.logger.info(f"WARNING: Model {self.model} unknown.") - # raise ValueError( - # "Model {} is not in the list of supported models. Consider adding manually to esgf_server.py".format( - # self.model - # ) - # ) - # self.node_link = self.avail_models[self.model]["node_link"] - - def _generate_plain_emission_vars(self): - if self.use_plain_emission_vars: - # plain vars are biomass vars - self.biomass_vars = self.vars - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_prc - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_share - ] - - self.vars = [ - variable + emission_ending for variable in self.vars for emission_ending in self.emissions_endings - ] - # be careful with CO2 - if "CO2_em_openburning" in self.vars: - self.vars.remove("CO2_em_openburning") - else: - # get plain input4mips vars = biomass vars for historical - self.biomass_vars = list({v.split("_")[0] for v in self.vars}) - # remove biomass vars from normal vars list - for b in self.biomass_vars: - try: - self.vars.remove(b) - except Exception as error: - self.logger.warning(f"Caught the following exception but continuing : {error}") - - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_prc - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in self.meta_endings_share - ] - - def _generate_raw_emission_vars(self, variables: list[str]): - if variables is None: - # variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] - raise ValueError("No variables have been given to the downloader. Variables must be given for downloader.") - variables = [v.replace(" ", "_").replace("-", "_") for v in variables] - self.logger.info(f"Cleaned variables : {variables}") - for v in variables: - self.vars.append(v) - - # - # Class functions - # - def download_from_model_single_var( # noqa: C901 - self, - variable: str, - experiment: str, - project: str = "CMIP6", - default_frequency: str = "mon", - preferred_version: str = "latest", - default_grid_label: str = "gn", - ): - """ - Function handling the download of a single variable-experiment pair that is associated with a model's output - (CMIP data). - - Args: - variable: variable ID - experiment: experiment ID - project: umbrella project id e.g. CMIPx - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - default_grid_label: default gridding method in which the data is provided - """ - - facets = ( - "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " - "version, grid_label, experiment_id" - ) - - self.logger.info("Using download_from_model_single_var() function") - - # Search context is sensitive to order and sequence, which is why - # it's done in different steps instead of putting everything in `new_context` - conn = SearchConnection(url=self.node_link, distrib=False) - ctx = conn.new_context( - project=project, - experiment_id=experiment, - source_id=self.model, - variable=variable, - facets=facets, - ) - - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - variants = list(ctx.facet_counts["variant_label"]) - - if len(variants) < 1: - self.logger.info( - "No items were found for this request. Please check on the esgf server if the combination of your model/scenarios/variables exists." - ) - raise ValueError( - "Downloader did not find any items on esgf for your request with: Project {project}, Experiment {experiment}, Model {self.model}, Variable {variable}." - ) - - self.logger.info(f"Available variants : {variants}\n") - self.logger.info(f"Length : {len(variants)}") - - # TODO refactor logic of if/else - if not self.ensemble_members: - if self.max_ensemble_members > len(variants): - self.logger.info("Less ensemble members available than maximum number desired. Including all variants.") - ensemble_member_final_list = variants - else: - self.logger.info( - f"{len(variants)} ensemble members available than desired (max {self.max_ensemble_members}. " - f"Choosing only the first {self.max_ensemble_members}.)." - ) - ensemble_member_final_list = variants[: self.max_ensemble_members] - else: - self.logger.info(f"Desired list of ensemble members given: {self.ensemble_members}") - ensemble_member_final_list = list(set(variants) & set(self.ensemble_members)) - if len(ensemble_member_final_list) == 0: - self.logger.info("WARNING: no overlap between available and desired ensemble members!") - self.logger.info("Skipping.") - return None - - for ensemble_member in ensemble_member_final_list: - self.logger.info(f"Ensembles member: {ensemble_member}") - ctx_ensemble = ctx.constrain(variant_label=ensemble_member) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx_ensemble = ctx_ensemble.constrain(version=version) - - results = ctx_ensemble.search() - - self.logger.info(f"Result len {len(results)}") - - download_model_variable( - model_id=self.model, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_raw_input_single_var( # noqa: C901 - self, - variable: str, - project: str = "input4mips", - institution_id: str = "PNNL-JGCRI", - default_frequency: str = "mon", - preferred_version: str = "latest", - default_grid_label: str = "gn", - ): - """ - Function handling the download of all input4mips data associated with a single variable. - - Args: - variable: variable ID - project: umbrella project, here "input4mips" - institution_id: id of the institution that provides the data - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - default_grid_label: default gridding method in which the data is provided - """ - self.logger.info("Using download_raw_input_single_var() function") - - facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - - # Search context is sensitive to order and sequence, which is why - # it's done in different steps instead of putting everything in `new_context` - conn = SearchConnection(url=self.node_link, distrib=False) - ctx = conn.new_context( - project=project, - variable=variable, - institution_id=institution_id, - facets=facets, - ) - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - mips_targets = list(ctx.facet_counts["target_mip"]) - self.logger.info(f"Available target mips: {mips_targets}") - - for target in mips_targets: - ctx_target = ctx.constrain(target_mip=target) - version = get_upload_version(context=ctx_target, preferred_version=preferred_version) - if version: - ctx_target = ctx_target.constrain(version=version) - - results = ctx_target.search() - self.logger.info(f"Result len {len(results)}") - if len(results) > 0: - download_raw_input_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_meta_historic_biomassburning_single_var( - self, - variable: str, - institution_id: str, - project: str = "input4mips", - default_grid_label: str = "gn", - default_frequency: str = "mon", - preferred_version: str = "latest", - ): - """ - Function handling the download of all metadata associated with a single input4mips variable. - - Args: - variable: variable ID - project: umbrella project - institution_id: id of the institution that provides the data - default_grid_label: default gridding method in which the data is provided - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - """ - variable_id = variable.replace("_", "-") - variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" - self.logger.info(variable, variable_id, institution_id) - facets = "nominal_resolution,version" - - # Search context is sensitive to order and sequence, which is why - # it's done in different steps instead of putting everything in `new_context` - conn = SearchConnection(url=self.node_link, distrib=False) - ctx = conn.new_context( - project=project, - variable=variable_search, - variable_id=variable_id, - institution_id=institution_id, - target_mip="CMIP", - facets=facets, - ) - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx = ctx.constrain(version=version) - - results = ctx.search() - self.logger.info(f"Result len {len(results)}") - - result_list = [r.file_context().search() for r in results] - self.logger.info(f"List of results :\n{result_list}") - - download_metadata_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_from_model(self): - """ - Function handling the download of all variables that are associated with a model's output. - - Searches for all files associated with the respected variables and experiment that the downloader - was initialized with. - - A search connection is established and the search is iteratively constraint to meet all specifications. - Data is downloaded and stored in a separate file for each year. The default format is netCDF4. - - Resulting hierarchy: - - `CMIPx/model_id/ensemble_member/experiment/variable/nominal_resolution/frequency/year.nc` - - If the constraints cannot be met, per default behaviour for the downloader to select first other - available value - """ - for variable in self.vars: - self.logger.info(f"Downloading data for variable: {variable}") - for experiment in self.experiments: - if experiment not in self.avail_experiments: - self.logger.info( - f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{self.avail_experiments}. Skipping." - ) - continue - self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var(project=self.project, variable=variable, experiment=experiment) - - def download_raw_input(self): - """ - Function handling the download of all variables that are associated with a model's input (input4mips). - - Searches for all files associated with the respected variables that the downloader was initialized with. - A search connection is established and the search is iteratively constraint to meet all specifications. - Data is downloaded and stored in a separate file for each year. The default format is netCDF4. - - Resulting hierarchy: - - `input4mips/experiment/variable/nominal_resolution/frequency/year.nc` - - If the constraints cannot be met, the default behaviour for the downloader is to select first other - available value. - """ - for variable in self.vars: - institution_id = "PNNL-JGCRI" - if variable.endswith("openburning"): - institution_id = "IAMC" - self.logger.info(f"Downloading data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id=institution_id) - - if self.download_biomass_burning & ("historical" in self.experiments): - for variable in self.biomass_vars: - self.logger.info(f"Downloading biomassburing data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id="VUA") - - if self.download_metafiles: - for variable in self.meta_vars_percentage: - # percentage are historic and have no scenarios - self.logger.info(f"Downloading meta percentage data for variable: {variable}") - self.download_meta_historic_biomassburning_single_var(variable=variable, institution_id="VUA") - for variable in self.meta_vars_share: - self.logger.info(f"Downloading meta openburning share data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id="IAMC") - - -def download_from_config_file(config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER): +def download_from_config_file(config_file: Union[str, pathlib.Path]): """ This function downloads variables automatically from input config file Args: @@ -534,7 +33,7 @@ def download_from_config_file(config_file: Union[str, pathlib.Path], logger: log verified_config_keys = [] for config_key in config_dict: - verified_key = match_project_key(input_key=config_key, key_list=AVAILABLE_CONFIGS) + verified_key = match_key_in_list(input_key=config_key, key_list=AVAILABLE_CONFIGS) if verified_key: verified_config_keys.append(verified_key) From 7789c1b722c468186e2172691db872aaa038c753 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 23:28:38 -0500 Subject: [PATCH 20/38] Update all download config files --- configs/core_dataset.yaml | 8 ++++---- .../cmip6/awi_sea-ice-thickness_control-1950.yaml | 7 +++---- configs/downloader/cmip6/canesm_co2_ssp.yaml | 5 ++--- configs/downloader/cmip6/fgoals_tas_ssp.yaml | 5 ++--- configs/downloader/cmip6/noresm_pr_historical.yaml | 7 +++---- configs/downloader/cmip6/noresm_tas_ssp.yaml | 7 +++---- configs/downloader/cmip6/ukesm_tas_picontrol.yaml | 5 ++--- configs/downloader/future_usecases/cmip7.yaml | 5 ++--- configs/downloader/future_usecases/e3sm.yaml | 5 ++--- configs/downloader/future_usecases/obs4mips.yaml | 5 ++--- configs/downloader/input4mips/bc_historical.yaml | 5 ++--- configs/downloader/input4mips/bc_ssp.yaml | 5 ++--- configs/downloader/input4mips/ch4_historical.yaml | 5 ++--- configs/downloader/input4mips/ch4_ssp.yaml | 5 ++--- configs/downloader/input4mips/co2_historical.yaml | 5 ++--- configs/downloader/input4mips/co2_ssp.yaml | 5 ++--- configs/downloader/input4mips/so2_historical.yaml | 5 ++--- configs/downloader/input4mips/so2_ssp.yaml | 5 ++--- configs/minimal_dataset.yaml | 14 ++++++-------- 19 files changed, 47 insertions(+), 66 deletions(-) diff --git a/configs/core_dataset.yaml b/configs/core_dataset.yaml index e9ab512..21ad438 100644 --- a/configs/core_dataset.yaml +++ b/configs/core_dataset.yaml @@ -1,7 +1,7 @@ CMIP6: model: "NorESM2-LM" - variables: ["tas", "pr"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] + variables: [ "tas", "pr" ] + experiments: [ "historical","ssp126", "ssp245", "ssp370", "ssp585" ] input4MIPs: - variables: ["CO2", "BC", "SO2", "CH4"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] + variables: [ "CO2", "BC", "SO2", "CH4" ] + experiments: [ "historical","ssp126", "ssp245", "ssp370", "ssp585" ] diff --git a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml index 81ca1d5..7a323b7 100644 --- a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml +++ b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml @@ -1,5 +1,4 @@ CMIP6: - models: ["AWI-CM-1-1-HR"] - downloader_kwargs: - variables: ["sithick"] - experiments: ["control-1950"] \ No newline at end of file + models: [ "AWI-CM-1-1-HR" ] + variables: [ "sithick" ] + experiments: [ "control-1950" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/canesm_co2_ssp.yaml b/configs/downloader/cmip6/canesm_co2_ssp.yaml index 4f33a2c..ebe7823 100644 --- a/configs/downloader/cmip6/canesm_co2_ssp.yaml +++ b/configs/downloader/cmip6/canesm_co2_ssp.yaml @@ -1,5 +1,4 @@ CMIP6: models: ["CanESM5"] - downloader_kwargs: - variables: ["co2"] - experiments: ["abrupt-2xCO2"] \ No newline at end of file + variables: [ "co2" ] + experiments: [ "abrupt-2xCO2" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/fgoals_tas_ssp.yaml b/configs/downloader/cmip6/fgoals_tas_ssp.yaml index b571e01..dfec4c3 100644 --- a/configs/downloader/cmip6/fgoals_tas_ssp.yaml +++ b/configs/downloader/cmip6/fgoals_tas_ssp.yaml @@ -1,5 +1,4 @@ CMIP6: models: ["FGOALS-g3"] - downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] \ No newline at end of file + variables: [ "tas" ] + experiments: [ "ssp245" ] diff --git a/configs/downloader/cmip6/noresm_pr_historical.yaml b/configs/downloader/cmip6/noresm_pr_historical.yaml index 2bb6f72..968a9e1 100644 --- a/configs/downloader/cmip6/noresm_pr_historical.yaml +++ b/configs/downloader/cmip6/noresm_pr_historical.yaml @@ -1,6 +1,5 @@ CMIP6: models: ["NorESM2-LM"] - downloader_kwargs: - variables: ["pr"] - experiments: ["historical"] - max_ensemble_members: 1 \ No newline at end of file + variables: [ "pr" ] + experiments: [ "historical" ] + max_ensemble_members: 1 \ No newline at end of file diff --git a/configs/downloader/cmip6/noresm_tas_ssp.yaml b/configs/downloader/cmip6/noresm_tas_ssp.yaml index 858eb4f..c8f605c 100644 --- a/configs/downloader/cmip6/noresm_tas_ssp.yaml +++ b/configs/downloader/cmip6/noresm_tas_ssp.yaml @@ -1,6 +1,5 @@ CMIP6: models: ["NorESM2-LM"] - downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] - ensemble_members: ["r9i1p1f2", "r8i1p1f2"] \ No newline at end of file + variables: [ "tas" ] + experiments: [ "ssp245" ] + ensemble_members: [ "r9i1p1f2", "r8i1p1f2" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml index 29aaf5c..60339d8 100644 --- a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml +++ b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml @@ -1,5 +1,4 @@ CMIP6: models: ["UKESM1-0-LL"] - downloader_kwargs: - variables: ["tas"] - experiments: ["piControl"] + variables: [ "tas" ] + experiments: [ "piControl" ] diff --git a/configs/downloader/future_usecases/cmip7.yaml b/configs/downloader/future_usecases/cmip7.yaml index dfc3dd6..100460e 100644 --- a/configs/downloader/future_usecases/cmip7.yaml +++ b/configs/downloader/future_usecases/cmip7.yaml @@ -1,5 +1,4 @@ CMIP6Plus: models: ["HasGEM3-GC31-LL"] - downloader_kwargs: - variables: ["mrsofc"] - experiments: ["hist-lu"] \ No newline at end of file + variables: [ "mrsofc" ] + experiments: [ "hist-lu" ] diff --git a/configs/downloader/future_usecases/e3sm.yaml b/configs/downloader/future_usecases/e3sm.yaml index d659326..16e9bdb 100644 --- a/configs/downloader/future_usecases/e3sm.yaml +++ b/configs/downloader/future_usecases/e3sm.yaml @@ -1,4 +1,3 @@ E3SM: - downloader_kwargs: - variables: ["???"] - experiments: ["ssp585"] \ No newline at end of file + variables: [ "???" ] + experiments: [ "ssp585" ] \ No newline at end of file diff --git a/configs/downloader/future_usecases/obs4mips.yaml b/configs/downloader/future_usecases/obs4mips.yaml index e671118..9441903 100644 --- a/configs/downloader/future_usecases/obs4mips.yaml +++ b/configs/downloader/future_usecases/obs4mips.yaml @@ -1,5 +1,4 @@ #ESACCI-CLOUD-ATSR2-AATSR-3-0: obs4MIPs: - downloader_kwargs: - variables: ["pctCCI"] - experiments: [""] \ No newline at end of file + variables: ["pctCCI"] + experiments: [""] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_historical.yaml b/configs/downloader/input4mips/bc_historical.yaml index 74463b3..194b790 100644 --- a/configs/downloader/input4mips/bc_historical.yaml +++ b/configs/downloader/input4mips/bc_historical.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["BC"] - experiments: ["historical"] \ No newline at end of file + variables: [ "BC" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_ssp.yaml b/configs/downloader/input4mips/bc_ssp.yaml index 107573d..2d88ff6 100644 --- a/configs/downloader/input4mips/bc_ssp.yaml +++ b/configs/downloader/input4mips/bc_ssp.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["BC"] - experiments: ["ssp585"] \ No newline at end of file + variables: [ "BC" ] + experiments: [ "ssp585" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_historical.yaml b/configs/downloader/input4mips/ch4_historical.yaml index f18bfee..00784f7 100644 --- a/configs/downloader/input4mips/ch4_historical.yaml +++ b/configs/downloader/input4mips/ch4_historical.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["CH4"] - experiments: ["historical"] \ No newline at end of file + variables: [ "CH4" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_ssp.yaml b/configs/downloader/input4mips/ch4_ssp.yaml index 21f1f79..e003e77 100644 --- a/configs/downloader/input4mips/ch4_ssp.yaml +++ b/configs/downloader/input4mips/ch4_ssp.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["CH4"] - experiments: ["ssp245"] \ No newline at end of file + variables: [ "CH4" ] + experiments: [ "ssp245" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_historical.yaml b/configs/downloader/input4mips/co2_historical.yaml index 15dbe54..28a7f8f 100644 --- a/configs/downloader/input4mips/co2_historical.yaml +++ b/configs/downloader/input4mips/co2_historical.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["CO2"] - experiments: ["historical"] \ No newline at end of file + variables: [ "CO2" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_ssp.yaml b/configs/downloader/input4mips/co2_ssp.yaml index 48d367f..31a0377 100644 --- a/configs/downloader/input4mips/co2_ssp.yaml +++ b/configs/downloader/input4mips/co2_ssp.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["C02"] - experiments: ["ssp460"] \ No newline at end of file + variables: [ "C02" ] + experiments: [ "ssp460" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_historical.yaml b/configs/downloader/input4mips/so2_historical.yaml index f2d2e6f..9be5c8d 100644 --- a/configs/downloader/input4mips/so2_historical.yaml +++ b/configs/downloader/input4mips/so2_historical.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["SO2"] - experiments: ["historical"] \ No newline at end of file + variables: [ "SO2" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_ssp.yaml b/configs/downloader/input4mips/so2_ssp.yaml index 55436c6..683b597 100644 --- a/configs/downloader/input4mips/so2_ssp.yaml +++ b/configs/downloader/input4mips/so2_ssp.yaml @@ -1,4 +1,3 @@ input4MIPs: - downloader_kwargs: - variables: ["SO2"] - experiments: ["ssp370"] \ No newline at end of file + variables: [ "SO2" ] + experiments: [ "ssp370" ] \ No newline at end of file diff --git a/configs/minimal_dataset.yaml b/configs/minimal_dataset.yaml index 742d043..cfe9371 100644 --- a/configs/minimal_dataset.yaml +++ b/configs/minimal_dataset.yaml @@ -1,9 +1,7 @@ CMIP6: - models: ["NorESM2-LM"] - downloader_kwargs: - variables: ["tas", "CH4", "CO2"] - experiments: ["historical", "ssp126"] - max_ensemble_members: 1 - overwrite: true - start_year: 1990 - end_year: 2030 \ No newline at end of file + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] +input4MIPs: + variables: [ "CO2", "CH4" ] + experiments: [ "historical","ssp126" ] \ No newline at end of file From ecf8a416056df39a04e0cb28388ea2c00652db5c Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 23:28:52 -0500 Subject: [PATCH 21/38] Add download example --- scripts/download_example.py | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 scripts/download_example.py diff --git a/scripts/download_example.py b/scripts/download_example.py new file mode 100644 index 0000000..733fa67 --- /dev/null +++ b/scripts/download_example.py @@ -0,0 +1,71 @@ +import typer + +from climateset import CONFIGS +from climateset.download import download_from_config_file, downloader_config +from climateset.download.cmip6_downloader import CMIP6Downloader +from climateset.download.input4mips_downloader import Input4MipsDownloader + +app = typer.Typer(no_args_is_help=True) + +CONFIG_PATH = CONFIGS / "minimal_dataset.yaml" + + +@app.command( + name="download-basic", + help="Download ClimateSet data by building the config objects. See function content for more details.", +) +def basic_download(): + """ + By default, will download to the DATA_DIR folder. You can override this behavior modifying the config objects or by + adding the `data_dir` key in the config file under each project. + + ex. + CMIP6: + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + input4MIPs: + variables: [ "CO2", "CH4" ] + experiments: [ "historical","ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + """ + input4mips_config = downloader_config.create_input4mips_downloader_config_from_file(CONFIG_PATH) + cmip6_config = downloader_config.create_cmip6_downloader_config_from_file(CONFIG_PATH) + + # If you want to specify where data will be downloaded, change the following: + # input4mips_config.data_dir = "PATH_TO_DATA_DIR" + # cmip6_config.data_dir = "PATH_TO_DATA_DIR" + + input4mips_downloader = Input4MipsDownloader(input4mips_config) + input4mips_downloader.download() + + cmip6_downloader = CMIP6Downloader(cmip6_config) + cmip6_downloader.download() + + +@app.command( + name="download-from-config", + help="Download ClimateSet data via download_from_config_file() function. See function content for more details.", +) +def alternative_approach(): + """ + By default, will download to the DATA_DIR folder. You can override this behavior by adding the `data_dir` key in the + config file under each project. + + ex. + CMIP6: + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + input4MIPs: + variables: [ "CO2", "CH4" ] + experiments: [ "historical","ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + """ + download_from_config_file(CONFIG_PATH) + + +if __name__ == "__main__": + app() From 260952dd3e4a8232545f8f905b1431ae6808bfa3 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 23:41:58 -0500 Subject: [PATCH 22/38] Update download_from_config_file() to use existing functions for each Downloader --- climateset/download/downloader.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index 5ceebc5..c0dbf67 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -1,21 +1,18 @@ +import logging import pathlib from typing import Union -from climateset.download.cmip6_downloader import CMIP6Downloader +from climateset.download.cmip6_downloader import cmip6_download_from_config from climateset.download.constants.esgf import CMIP6, INPUT4MIPS -from climateset.download.downloader_config import ( - AVAILABLE_CONFIGS, - create_cmip6_downloader_config_from_file, - create_input4mips_downloader_config_from_file, -) -from climateset.download.input4mips_downloader import Input4MipsDownloader +from climateset.download.downloader_config import AVAILABLE_CONFIGS +from climateset.download.input4mips_downloader import input4mips_download_from_config from climateset.download.utils import match_key_in_list from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) -def download_from_config_file(config_file: Union[str, pathlib.Path]): +def download_from_config_file(config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER): """ This function downloads variables automatically from input config file Args: @@ -27,8 +24,8 @@ def download_from_config_file(config_file: Union[str, pathlib.Path]): config_dict = get_yaml_config(config_file) downloader_factory = { - INPUT4MIPS: {"configs": create_input4mips_downloader_config_from_file, "downloader": Input4MipsDownloader}, - CMIP6: {"configs": create_cmip6_downloader_config_from_file, "downloader": CMIP6Downloader}, + INPUT4MIPS: input4mips_download_from_config, + CMIP6: cmip6_download_from_config, } verified_config_keys = [] @@ -36,8 +33,11 @@ def download_from_config_file(config_file: Union[str, pathlib.Path]): verified_key = match_key_in_list(input_key=config_key, key_list=AVAILABLE_CONFIGS) if verified_key: verified_config_keys.append(verified_key) + else: + logger.error( + f"Input project [{config_key}] from [{config_file}]was not found in available projects. " + "Removing it from download list" + ) for config_key in verified_config_keys: - configs = downloader_factory[config_key]["configs"](config_file=config_file) - downloader = downloader_factory[config_key]["downloader"](config=configs) - downloader.download() + downloader_factory[config_key](config=config_file) From 1017f14ad763efd0f183a91abd12113c00900ce6 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 26 Feb 2025 23:44:33 -0500 Subject: [PATCH 23/38] Fix Pylint errors --- climateset/download/cmip6_downloader.py | 6 ++++-- climateset/download/downloader_config.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py index 9a1748b..d4a77da 100644 --- a/climateset/download/cmip6_downloader.py +++ b/climateset/download/cmip6_downloader.py @@ -94,10 +94,12 @@ def download_from_model_single_var( # noqa: C901 if len(variants) < 1: self.logger.info( - "No items were found for this request. Please check on the esgf server if the combination of your model/scenarios/variables exists." + "No items were found for this request. Please check on the esgf server if the combination of your " + "model/scenarios/variables exists." ) raise ValueError( - "Downloader did not find any items on esgf for your request with: Project {project}, Experiment {experiment}, Model {self.model}, Variable {variable}." + f"Downloader did not find any items on esgf for your request with: Project {project}, " + f"Experiment {experiment}, Model {model}, Variable {variable}." ) self.logger.info(f"Available variants : {variants}\n") diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index dc3d00c..0926b3d 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -117,7 +117,7 @@ def generate_config_dict(self): def generate_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: config_full_path = self._handle_yaml_config_path(config_file_name, config_path) data = self.generate_config_dict() - with open(config_full_path, "w") as config_file: + with open(config_full_path, "w", encoding="utf-8") as config_file: yaml.dump(data, config_file, indent=2) def add_to_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: @@ -128,7 +128,7 @@ def add_to_config_file(self, config_file_name: str, config_path: Union[str, Path existing_config.update(existing_config) new_config = self.generate_config_dict() existing_config.update(new_config) - with open(config_full_path, "w") as config_file: + with open(config_full_path, "w", encoding="utf-8") as config_file: yaml.dump(existing_config, config_file, indent=2) From 445115f1219a7f36d0272c92f203e54262d407e8 Mon Sep 17 00:00:00 2001 From: liellnima Date: Thu, 27 Feb 2025 18:57:57 +0100 Subject: [PATCH 24/38] fix typo --- configs/downloader/input4mips/co2_ssp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/downloader/input4mips/co2_ssp.yaml b/configs/downloader/input4mips/co2_ssp.yaml index 31a0377..1646a9d 100644 --- a/configs/downloader/input4mips/co2_ssp.yaml +++ b/configs/downloader/input4mips/co2_ssp.yaml @@ -1,3 +1,3 @@ input4MIPs: - variables: [ "C02" ] + variables: [ "CO2" ] experiments: [ "ssp460" ] \ No newline at end of file From 51614eb88ba79e45b1f5e6a34b987f3e17df371b Mon Sep 17 00:00:00 2001 From: liellnima Date: Thu, 27 Feb 2025 18:58:42 +0100 Subject: [PATCH 25/38] update minimal usecase config and add ocean configs for future --- configs/downloader/future_usecases/noresm_ocean.yaml | 5 +++++ configs/minimal_dataset.yaml | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 configs/downloader/future_usecases/noresm_ocean.yaml diff --git a/configs/downloader/future_usecases/noresm_ocean.yaml b/configs/downloader/future_usecases/noresm_ocean.yaml new file mode 100644 index 0000000..76b58d6 --- /dev/null +++ b/configs/downloader/future_usecases/noresm_ocean.yaml @@ -0,0 +1,5 @@ +OMIP: + models: ["NorESM2-LM"] + variables: [ "omldamax" ] # sea-surface temperature (often used as forcing for atmo models) + experiments: [ "omip1" ] + ensemble_members: [ "r1i1p1f1" ] \ No newline at end of file diff --git a/configs/minimal_dataset.yaml b/configs/minimal_dataset.yaml index cfe9371..f53da4f 100644 --- a/configs/minimal_dataset.yaml +++ b/configs/minimal_dataset.yaml @@ -2,6 +2,8 @@ CMIP6: models: [ "NorESM2-LM" ] variables: [ "tas" ] experiments: [ "historical", "ssp126" ] + max_ensemble_members: 1 + ensemble_members: ["r2i1p1f1"] input4MIPs: - variables: [ "CO2", "CH4" ] + variables: [ "CH4", "CO2" ] experiments: [ "historical","ssp126" ] \ No newline at end of file From 30e3969d56a25498e335f4cd04ba575d3130c557 Mon Sep 17 00:00:00 2001 From: liellnima Date: Thu, 27 Feb 2025 18:59:48 +0100 Subject: [PATCH 26/38] add ocean constants for future use cases, can be ignored rn --- climateset/download/constants/omip.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 climateset/download/constants/omip.py diff --git a/climateset/download/constants/omip.py b/climateset/download/constants/omip.py new file mode 100644 index 0000000..abcdb10 --- /dev/null +++ b/climateset/download/constants/omip.py @@ -0,0 +1,22 @@ +class OmipConstants: + """ + Attributes: + NODE_LINK (str): Where the data can be accessed + MODEL_SOURCES (List): Identifiers for supported climate models + VAR_SOURCE_LOOKUP (Dict>): model and raw variables + SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + """ + + NODE_LINK = "http://esgf-data2.llnl.gov" + + MODEL_SOURCES = [ + "NorESM2-LM", + ] + + VAR_SOURCE_LOOKUP = [ + "omldamax", + ] + + SUPPORTED_EXPERIMENTS = [ + "omip1", + ] From 2c341dc3588d5724efe0c9da71aaca9a39544830 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 16:06:11 -0400 Subject: [PATCH 27/38] Update with new QA tools and new Makefile version --- .make/base.make | 553 ++++++++++++++++------- .pre-commit-config.yaml | 12 +- Makefile.private.example | 10 + climateset/download/downloader_config.py | 4 +- noxfile.py | 60 ++- poetry.lock | 73 ++- pyproject.toml | 96 +++- 7 files changed, 612 insertions(+), 196 deletions(-) diff --git a/.make/base.make b/.make/base.make index 9342369..3f38651 100644 --- a/.make/base.make +++ b/.make/base.make @@ -3,7 +3,7 @@ # If necessary, override the corresponding variable and/or target, or create new ones # in one of the following files, depending on the nature of the override : # -# Makefile.variables, Makefile.targets or Makefile.private`, +# Makefile.variables, Makefile.targets or Makefile.private, # # The only valid reason to modify this file is to fix a bug or to add new # files to include. @@ -16,7 +16,7 @@ PROJECT_PATH := $(dir $(abspath $(firstword $(MAKEFILE_LIST)))) MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) SHELL := /usr/bin/env bash BUMP_TOOL := bump-my-version -MAKEFILE_VERSION := 0.2.0 +MAKEFILE_VERSION := 0.5.0 DOCKER_COMPOSE ?= docker compose AUTO_INSTALL ?= @@ -24,6 +24,12 @@ AUTO_INSTALL ?= # CONDA_TOOL can be overridden in Makefile.private file CONDA_TOOL := conda CONDA_ENVIRONMENT ?= +CONDA_YES_OPTION ?= + +# Default environment to install package +# Can be overridden in Makefile.private file +DEFAULT_INSTALL_ENV ?= +DEFAULT_POETRY_INSTALL_ENV ?= # Colors _SECTION := \033[1m\033[34m @@ -31,6 +37,11 @@ _TARGET := \033[36m _NORMAL := \033[0m .DEFAULT_GOAL := help + +# Project and Private variables and targets import to override variables for local +# This is to make sure, sometimes the Makefile includes don't work. +-include Makefile.variables +-include Makefile.private ## -- Informative targets ------------------------------------------------------------------------------------------- ## .PHONY: all @@ -69,167 +80,153 @@ targets: help version: ## display current version @echo "version: $(APP_VERSION)" -## -- Conda targets ------------------------------------------------------------------------------------------------- ## +## -- Virtualenv targets -------------------------------------------------------------------------------------------- ## -.PHONY: conda-install -conda-install: ## Install Conda on your local machine - @echo "Looking for [$(CONDA_TOOL)]..."; \ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo " "; \ - echo "Your defined Conda tool [$(CONDA_TOOL)] has not been found."; \ - echo " "; \ - echo "If you know you already have [$(CONDA_TOOL)] or some other Conda tool installed,"; \ - echo "Check your [CONDA_TOOL] variable in the Makefile.private for typos."; \ - echo " "; \ - echo "If your conda tool has not been initiated through your .bashrc file,"; \ - echo "consider using the full path to its executable instead when"; \ - echo "defining your [CONDA_TOOL] variable"; \ - echo " "; \ - echo "If in doubt, don't install Conda and manually create and activate"; \ - echo "your own Python environment."; \ - echo " "; \ - echo -n "Would you like to install Miniconda ? [y/N]: "; \ - read ans; \ - case $$ans in \ - [Yy]*) \ - echo "Fetching and installing miniconda"; \ - echo " "; \ - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh; \ - bash ~/miniconda.sh -b -p $${HOME}/.conda; \ - export PATH=$${HOME}/.conda/bin:$$PATH; \ - conda init; \ - /usr/bin/rm ~/miniconda.sh; \ - ;; \ - *) \ - echo "Skipping installation."; \ - echo " "; \ - ;; \ - esac; \ - else \ - echo "Conda tool [$(CONDA_TOOL)] has been found, skipping installation"; \ - fi; +VENV_PATH := $(PROJECT_PATH).venv +VENV_ACTIVATE := $(VENV_PATH)/bin/activate -.PHONY: conda-create-env -conda-create-env: conda-install ## Create a local Conda environment based on `environment.yml` file - @$(CONDA_TOOL) env create -f environment.yml +.PHONY: venv-create +venv-create: ## Create a virtualenv '.venv' at the root of the project folder + @virtualenv $(VENV_PATH) + @make -s venv-activate -.PHONY: conda-env-info -conda-env-info: ## Print information about active Conda environment using - @$(CONDA_TOOL) info +.PHONY: venv-activate +venv-activate: ## Print out the shell command to activate the project's virtualenv. + @echo "source $(VENV_ACTIVATE)" -.PHONY: _conda-poetry-install -_conda-poetry-install: - $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) install -c conda-forge poetry; \ - CURRENT_VERSION=$$(poetry --version | awk '{print $$NF}' | tr -d ')'); \ - REQUIRED_VERSION="1.6.0"; \ - if [ "$$(printf '%s\n' "$$REQUIRED_VERSION" "$$CURRENT_VERSION" | sort -V | head -n1)" != "$$REQUIRED_VERSION" ]; then \ - echo "Poetry installed version $$CURRENT_VERSION is less than minimal version $$REQUIRED_VERSION, fixing urllib3 version to prevent problems"; \ - poetry add "urllib3<2.0.0"; \ - fi; +## -- Poetry targets ------------------------------------------------------------------------------------------------ ## -.PHONY:conda-poetry-install -conda-poetry-install: ## Install Poetry in currently active Conda environment. Will fail if Conda is not found +.PHONY: poetry-install-auto +poetry-install-auto: ## Install Poetry in Conda environment, or with pipx in a virtualenv if Conda not found @poetry --version; \ if [ $$? != "0" ]; then \ echo "Poetry not found, proceeding to install Poetry..."; \ - echo "Looking for [$(CONDA_TOOL)]...";\ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo "$(CONDA_TOOL) not found; Poetry will not be installed"; \ - else \ - echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ - make -s _conda-poetry-install; \ + if [ "$(DEFAULT_POETRY_INSTALL_ENV)" == "conda" ]; then \ + ans_where="conda"; \ + elif [ "$(DEFAULT_POETRY_INSTALL_ENV)" == "venv" ]; then \ + ans_where="venv"; \ + else\ + echo -n "Where would you like to install Poetry, in a dedicated virtualenv (venv), or a conda environment? [venv/conda]: "; \ + read ans_where; \ fi; \ + case $$ans_where in \ + "venv" | "Venv" |"VENV") \ + make AUTO_INSTALL=true -s poetry-install-venv; \ + ;; \ + "conda" | "Conda" | "CONDA") \ + echo "Installing poetry with Conda"; \ + make AUTO_INSTALL=true -s conda-poetry-install; \ + ;; \ + *) \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Option $$ans_how not found, exiting process."; \ + echo ""; \ + exit 1; \ + esac; \ fi; -.PHONY: conda-poetry-uninstall -conda-poetry-uninstall: ## Uninstall Poetry located in currently active Conda environment - $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) remove poetry - -.PHONY: conda-clean-env -conda-clean-env: ## Completely removes local project's Conda environment - $(CONDA_TOOL) env remove -n $(CONDA_ENVIRONMENT) +.PHONY: _pipx_install_poetry +_pipx_install_poetry: + @output="$$(pip install poetry --dry-run)"; \ + if echo "$$output" | grep -q computecanada ; then \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Compute Canada (DRAC) environment detected: Installing Poetry < 2.0.0"; \ + echo ""; \ + pipx install 'poetry<2.0.0' ; \ + else \ + pipx install poetry ; \ + fi; -## -- Poetry targets ------------------------------------------------------------------------------------------------ ## -.PHONY: poetry-install-auto -poetry-install-auto: ## Install Poetry in activated Conda environment, or with pipx if Conda not found - @poetry --version; \ - if [ $$? != "0" ]; then \ - echo "Poetry not found, proceeding to install Poetry..."; \ - echo "Looking for [$(CONDA_TOOL)]...";\ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo "$(CONDA_TOOL) not found, trying with pipx"; \ - pipx --version; \ +.PHONY: poetry-install +poetry-install: ## Install standalone Poetry using pipx. Will ask where to install pipx. + @echo "Looking for Poetry version...";\ + poetry --version; \ + if [ $$? != "0" ]; then \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo "Poetry not found..."; \ + echo "Looking for pipx version...";\ + pipx_found=0; \ + pipx --version; \ if [ $$? != "0" ]; then \ - echo "pipx not found; installing pipx"; \ - pip install --user pipx; \ - pipx ensurepath; \ + pipx_found=1; \ + echo "pipx not found..."; \ + echo""; \ + echo -n "Would you like to install pipx and Poetry? [y/N]: "; \ + else \ + echo""; \ + echo -n "Would you like to install Poetry using pipx? [y/N]: "; \ fi; \ - pipx install poetry; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + if [ $$pipx_found == "1" ]; then \ + echo""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m The following pip has been found and will be used to install pipx: "; \ + echo " -> "$$(which pip); \ + echo""; \ + echo "If you do not have write permission to that environment, using it to install pipx will fail."; \ + echo "If this is the case, you should install pipx using a virtual one."; \ + echo""; \ + echo "See documentation for more information."; \ + echo""; \ + echo -n "Would you like to use the local available pip above, or create virtual environment to install pipx? [local/virtual]: "; \ + read ans_how; \ + case $$ans_how in \ + "LOCAL" | "Local" |"local") \ + make -s poetry-install-local; \ + ;; \ + "VIRTUAL" | "Virtual" | "virtual") \ + make -s poetry-install-venv; \ + ;; \ + *) \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Option $$ans_how not found, exiting process."; \ + echo ""; \ + exit 1; \ + esac; \ else \ - echo "Installing poetry with Conda"; \ - make -s _conda-poetry-install; \ + echo "Installing Poetry"; \ + make -s _pipx_install_poetry; \ fi; \ - fi; - -.PHONY: poetry-install -poetry-install: ## Install standalone Poetry using pipx and create Poetry env. Will install pipx if not found - @echo "Looking for Poetry version...";\ - poetry --version; \ - if [ $$? != "0" ]; then \ - if [ "$(AUTO_INSTALL)" = "true" ]; then \ - ans="y";\ - else \ - echo "Looking for pipx version...";\ - pipx --version; \ - if [ $$? != "0" ]; then \ - echo""; \ - echo -e "\e[1;39;41m-- WARNING --\e[0m The following pip has been found and will be used to install pipx: "; \ - echo " -> "$$(which pip); \ - echo""; \ - echo "If you do not have write permission to that environment, you will need to either activate"; \ - echo "a different environment, or create a virtual one (ex. venv) to install pipx into it."; \ - echo "See documentation for more information."; \ - echo""; \ - echo "Alternatively, the [make poetry-install-venv] target can also be used"; \ - echo""; \ - echo -n "Would you like to install pipx and Poetry? [y/N]: "; \ - else \ - echo""; \ - echo -n "Would you like to install Poetry using pipx? [y/N]: "; \ - fi; \ - read ans; \ - fi; \ - case $$ans in \ - [Yy]*) \ - pipx --version; \ - if [ $$? != "0" ]; then \ - echo "pipx not found; installing pipx"; \ - pip install --user pipx || pip install pipx; \ - pipx ensurepath; \ - fi; \ - echo "Installing Poetry"; \ - pipx install poetry; \ - make -s poetry-create-env; \ - ;; \ - *) \ - echo "Skipping installation."; \ - echo " "; \ - ;; \ - esac; \ - fi; + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + fi; +PIPX_VENV_PATH := $$HOME/.pipx_venv .PHONY: poetry-install-venv -poetry-install-venv: ## Install standalone Poetry and Poetry environment. Will install pipx in $HOME/.pipx_venv - @echo "Creating virtual environment using venv here : [$$HOME/.pipx_venv]" - @python3 -m venv $$HOME/.pipx_venv - @echo "Activating virtual environment [$$HOME/.pipx_venv]" - @source $$HOME/.pipx_venv/bin/activate - @pip3 install pipx - @make -s poetry-install +poetry-install-venv: ## Install standalone Poetry. Will install pipx in $HOME/.pipx_venv + @pipx --version; \ + if [ $$? != "0" ]; then \ + echo "Creating virtual environment using venv here : [$(PIPX_VENV_PATH)]"; \ + python3 -m venv $(PIPX_VENV_PATH); \ + echo "Activating virtual environment [$(PIPX_VENV_PATH)]"; \ + source $(PIPX_VENV_PATH)/bin/activate; \ + pip3 install pipx; \ + pipx ensurepath; \ + source $(PIPX_VENV_PATH)/bin/activate && make -s _pipx_install_poetry ; \ + else \ + make -s _pipx_install_poetry ; \ + fi; + +.PHONY: poetry-install-local +poetry-install-local: ## Install standalone Poetry. Will install pipx with locally available pip. + @pipx --version; \ + if [ $$? != "0" ]; then \ + echo "pipx not found; installing pipx"; \ + pip3 install pipx; \ + pipx ensurepath; \ + fi; + @echo "Installing Poetry" + @make -s _pipx_install_poetry + .PHONY: poetry-env-info poetry-env-info: ## Information about the currently active environment used by Poetry @@ -247,6 +244,10 @@ poetry-create-env: ## Create a Poetry managed environment for the project (Outsi @echo "Use and for more information" @echo"" +.PHONY: poetry-activate +poetry-activate: ## Print the shell command to activate the project's poetry env. + poetry env activate + .PHONY: poetry-remove-env poetry-remove-env: ## Remove current project's Poetry managed environment. @if [ "$(AUTO_INSTALL)" = "true" ]; then \ @@ -255,6 +256,7 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. env_name=$$(basename $$env_path); \ else \ echo""; \ + echo "Looking for poetry environments..."; \ env_path=$$(poetry env info -p); \ if [[ "$$env_path" != "" ]]; then \ echo "The following environment has been found for this project: "; \ @@ -266,11 +268,15 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. echo "If the active environment listed above is a Conda environment,"; \ echo "Choosing to delete it will have no effect; use the target "; \ echo""; \ + echo""; \ + echo "If the active environment listed above is a venv environment,"; \ + echo "Choosing to delete it will have no effect; use the bash command $ rm -rf "; \ + echo""; \ echo -n "Would you like delete the environment listed above? [y/N]: "; \ read ans_env; \ else \ - env_name="None"; \ - env_path="None"; \ + env_name="None"; \ + env_path="None"; \ fi; \ fi; \ if [[ $$env_name != "None" ]]; then \ @@ -282,6 +288,8 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. echo "No environment was found/provided - skipping environment deletion"; \ ;;\ esac; \ + else \ + echo "No environments were found... skipping environment deletion"; \ fi; \ .PHONY: poetry-uninstall @@ -295,7 +303,15 @@ poetry-uninstall: poetry-remove-env ## Uninstall pipx-installed Poetry and the c fi; \ case $$ans in \ [Yy]*) \ - pipx uninstall poetry; \ + pipx --version ; \ + if [ $$? != "0" ]; then \ + echo "" ; \ + echo "Pipx not found globally, trying with $(PIPX_VENV_PATH) env" ;\ + echo "" ; \ + source $(PIPX_VENV_PATH)/bin/activate && pipx uninstall poetry ; \ + else \ + pipx uninstall poetry ; \ + fi; \ ;; \ *) \ echo "Skipping uninstallation."; \ @@ -324,53 +340,253 @@ poetry-uninstall-pipx: poetry-remove-env ## Uninstall pipx-installed Poetry, the esac; \ .PHONY: poetry-uninstall-venv -poetry-uninstall-venv: ## Uninstall pipx-installed Poetry, the created Poetry environment, pipx and $HOME/.pipx_venv - @python3 -m venv $$HOME/.pipx_venv - @source $$HOME/.pipx_venv/bin/activate - @make -s poetry-uninstall-pipx +poetry-uninstall-venv: poetry-remove-env ## Uninstall pipx-installed Poetry, the created Poetry environment, pipx and $HOME/.pipx_venv + @if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo""; \ + echo -n "Would you like to uninstall pipx-installed Poetry and pipx? [y/N]: "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + (source $(PIPX_VENV_PATH)/bin/activate && pipx uninstall poetry); \ + (source $(PIPX_VENV_PATH)/bin/activate && pip uninstall -y pipx); \ + ;; \ + *) \ + echo "Skipping uninstallation."; \ + echo " "; \ + ;; \ + esac; \ + @if [ "$(AUTO_INSTALL)" = "true" ]; then \ ans="y";\ else \ echo""; \ - echo -n "Would you like to remove the virtual environment located here : [$$HOME/.pipx_venv] ? [y/N]: "; \ + echo -n "Would you like to remove the virtual environment located here : [$(PIPX_VENV_PATH)] ? [y/N]: "; \ read ans; \ fi; \ case $$ans in \ [Yy]*) \ - rm -r $$HOME/.pipx_venv; \ + rm -r $(PIPX_VENV_PATH); \ ;; \ *) \ - echo "Skipping [$$HOME/.pipx_venv] virtual environment removal."; \ + echo "Skipping [$(PIPX_VENV_PATH)] virtual environment removal."; \ echo ""; \ ;; \ esac; \ -## -- Install targets (All install targets will install Poetry if not found using `make poetry-install-auto`)-------- ## +## -- Conda targets ------------------------------------------------------------------------------------------------- ## + +.PHONY: conda-install +conda-install: ## Install Conda on your local machine + @echo "Looking for [$(CONDA_TOOL)]..."; \ + $(CONDA_TOOL) --version; \ + if [ $$? != "0" ]; then \ + echo " "; \ + echo "Your defined Conda tool [$(CONDA_TOOL)] has not been found."; \ + echo " "; \ + echo "If you know you already have [$(CONDA_TOOL)] or some other Conda tool installed,"; \ + echo "Check your [CONDA_TOOL] variable in the Makefile.private for typos."; \ + echo " "; \ + echo "If your conda tool has not been initiated through your .bashrc file,"; \ + echo "consider using the full path to its executable instead when"; \ + echo "defining your [CONDA_TOOL] variable"; \ + echo " "; \ + echo "If in doubt, don't install Conda and manually create and activate"; \ + echo "your own Python environment."; \ + echo " "; \ + echo -n "Would you like to install Miniconda ? [y/N]: "; \ + read ans; \ + case $$ans in \ + [Yy]*) \ + echo "Fetching and installing miniconda"; \ + echo " "; \ + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh; \ + bash ~/miniconda.sh -b -p $${HOME}/.conda; \ + export PATH=$${HOME}/.conda/bin:$$PATH; \ + conda init; \ + /usr/bin/rm ~/miniconda.sh; \ + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + else \ + echo "Conda tool [$(CONDA_TOOL)] has been found, skipping installation"; \ + fi; + +.PHONY: conda-create-env +conda-create-env: conda-install ## Create a local Conda environment based on 'environment.yml' file + @$(CONDA_TOOL) env create $(CONDA_YES_OPTION) -f environment.yml + +.PHONY: conda-env-info +conda-env-info: ## Print information about active Conda environment using + @$(CONDA_TOOL) info + +.PHONY: conda-activate +conda-activate: ## Print the shell command to activate the project's Conda env. + @echo "$(CONDA_TOOL) activate $(CONDA_ENVIRONMENT)" + +.PHONY: _conda-poetry-install +_conda-poetry-install: + @$(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) python --version; \ + if [ $$? != "0" ]; then \ + echo "Target environment doesn't seem to exist..."; \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo ""; \ + echo -n "Do you want to create it? [y/N] "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + echo "Creating conda environment : [$(CONDA_ENVIRONMENT)]"; \ + make -s conda-create-env; \ + ;; \ + *) \ + echo "Exiting..."; \ + exit 1;\ + ;; \ + esac;\ + fi; + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) install $(CONDA_YES_OPTION) -c conda-forge poetry; \ + CURRENT_VERSION=$$($(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry --version | awk '{print $$NF}' | tr -d ')'); \ + REQUIRED_VERSION="1.6.0"; \ + if [ "$$(printf '%s\n' "$$REQUIRED_VERSION" "$$CURRENT_VERSION" | sort -V | head -n1)" != "$$REQUIRED_VERSION" ]; then \ + echo "Poetry installed version $$CURRENT_VERSION is less than minimal version $$REQUIRED_VERSION, fixing urllib3 version to prevent problems"; \ + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry add "urllib3<2.0.0"; \ + fi; + +.PHONY:conda-poetry-install +conda-poetry-install: ## Install Poetry in the project's Conda environment. Will fail if Conda is not found + @poetry --version; \ + if [ $$? != "0" ]; then \ + echo "Poetry not found, proceeding to install Poetry..."; \ + echo "Looking for [$(CONDA_TOOL)]...";\ + $(CONDA_TOOL) --version; \ + if [ $$? != "0" ]; then \ + echo "$(CONDA_TOOL) not found; Poetry will not be installed"; \ + else \ + echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ + make -s _conda-poetry-install; \ + fi; \ + else \ + echo ""; \ + echo "Poetry has been found on this system :"; \ + echo " Install location: $$(which poetry)"; \ + echo ""; \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo -n "Would you like to install poetry in the project's conda environment anyway ? [y/N]: "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ + make -s _conda-poetry-install; \ + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + fi; + +.PHONY: conda-poetry-uninstall +conda-poetry-uninstall: ## Uninstall Poetry located in currently active Conda environment + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) remove $(CONDA_YES_OPTION) poetry + +.PHONY: conda-clean-env +conda-clean-env: ## Completely removes local project's Conda environment + $(CONDA_TOOL) env remove $(CONDA_YES_OPTION) -n $(CONDA_ENVIRONMENT) + +## -- Install targets (All install targets will install Poetry if not found using 'make poetry-install-auto')-------- ## + +POETRY_COMMAND := poetry + +ifeq ($(DEFAULT_INSTALL_ENV),venv) +POETRY_COMMAND := source $(VENV_ACTIVATE) && poetry +else ifeq ($(DEFAULT_INSTALL_ENV),poetry) +POETRY_COMMAND := poetry +else ifeq ($(DEFAULT_INSTALL_ENV),conda) +POETRY_COMMAND := $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry +endif + +.PHONY: _check-env +_check-env: + @if ! [ $(DEFAULT_INSTALL_ENV) ]; then \ + echo -e "\e[1;39;41m-- WARNING --\e[0m No installation environment have been defined." ; \ + echo "" ; \ + echo "Defaulting to Poetry managed environment - Poetry will either use activated environment, or '.venv'," ; \ + echo "if found, or create and manage it's own environment if not." ; \ + elif [ $(DEFAULT_INSTALL_ENV) = "venv" ]; then \ + if [ ! -f $(VENV_ACTIVATE) ]; then \ + make -s venv-create ;\ + fi; \ + elif [ $(DEFAULT_INSTALL_ENV) = "conda" ]; then \ + if ! $(CONDA_TOOL) env list | grep -q $(CONDA_ENVIRONMENT) ; then \ + make -s conda-create-env ; \ + fi; \ + fi; + +.PHONY: _remind-env-activate +_remind-env-activate: + @echo "" + @echo "Activate your environment using the following command:" + @echo "" + @if ! [ $(DEFAULT_INSTALL_ENV) ] || [ $(DEFAULT_INSTALL_ENV) = "poetry" ]; then \ + make -s poetry-env-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make poetry-activate)"; \ + echo "" ; \ + echo "The environment can also be used through the 'poetry run ' command."; \ + echo "" ; \ + echo " Ex: poetry run python "; \ + elif [ $(DEFAULT_INSTALL_ENV) = "venv" ]; then \ + make -s venv-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make venv-activate)"; \ + elif [ $(DEFAULT_INSTALL_ENV) = "conda" ]; then \ + make -s conda-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make conda-activate)"; \ + fi; + @echo "" + +test-echo: + @echo "use the eval bash command : eval \$$(make poetry-activate)" .PHONY: install install: install-precommit ## Install the application package, developer dependencies and pre-commit hook .PHONY: install-precommit -install-precommit: install-dev## Install the pre-commit hooks (also installs developer dependencies) +install-precommit: install-dev ## Install the pre-commit hooks (also installs developer dependencies) @if [ -f .git/hooks/pre-commit ]; then \ echo "Pre-commit hook found"; \ else \ echo "Pre-commit hook not found, proceeding to configure it"; \ - poetry run pre-commit install; \ + $(POETRY_COMMAND) run pre-commit install; \ fi; .PHONY: install-dev -install-dev: poetry-install-auto ## Install the application along with developer dependencies - @poetry install --with dev +install-dev: poetry-install-auto _check-env ## Install the application along with developer dependencies + @$(POETRY_COMMAND) install --with dev + @make -s _remind-env-activate .PHONY: install-with-lab -install-with-lab: poetry-install-auto ## Install the application and it's dev dependencies, including Jupyter Lab - @poetry install --with dev --with lab +install-with-lab: poetry-install-auto _check-env ## Install the application and it's dev dependencies, including Jupyter Lab + @$(POETRY_COMMAND) install --with dev --with lab + @make -s _remind-env-activate .PHONY: install-package -install-package: poetry-install-auto ## Install the application package only - @poetry install +install-package: poetry-install-auto _check-env ## Install the application package only + @$(POETRY_COMMAND) install + @make -s _remind-env-activate ## -- Versioning targets -------------------------------------------------------------------------------------------- ## @@ -381,6 +597,10 @@ ifeq ($(filter dry, $(MAKECMDGOALS)), dry) BUMP_ARGS := $(BUMP_ARGS) --dry-run --allow-dirty endif +.PHONY: dry +dry: ## Add the dry target for a preview of changes; ex. 'make bump-major dry' + @-echo > /dev/null + .PHONY: bump-major bump-major: ## Bump application major version $(BUMP_TOOL) $(BUMP_ARGS) bump major @@ -404,9 +624,13 @@ check-lint: ## Check code linting (black, isort, flake8, docformatter and pylint poetry run nox -s check .PHONY: check-pylint -check-pylint: ## Check code linting with pylint +check-pylint: ## Check code with pylint poetry run nox -s pylint +.PHONY: check-complexity +check-complexity: ## Check code cyclomatic complexity with Flake8-McCabe + poetry run nox -s complexity + .PHONY: fix-lint fix-lint: ## Fix code linting (black, isort, flynt, docformatter) poetry run nox -s fix @@ -415,7 +639,6 @@ fix-lint: ## Fix code linting (black, isort, flynt, docformatter) precommit: ## Run Pre-commit on all files manually poetry run nox -s precommit - ## -- Tests targets ------------------------------------------------------------------------------------------------- ## .PHONY: test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d4254c..8e0565d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,8 +17,18 @@ repos: - id: check-added-large-files args: ["--maxkb=5000"] + - repo: https://github.com/PyCQA/autoflake + rev: v2.3.1 + hooks: + - id: autoflake + + - repo: https://github.com/hhatto/autopep8 + rev: v2.3.2 + hooks: + - id: autopep8 + - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.4.2 hooks: - id: black diff --git a/Makefile.private.example b/Makefile.private.example index be83cd0..c65e8d5 100644 --- a/Makefile.private.example +++ b/Makefile.private.example @@ -24,5 +24,15 @@ DOCKER_COMPOSE := docker compose # a 'true' value will automatically install/remove without asking beforehand. AUTO_INSTALL := false +# The default environment to use. The choices are as follow: [venv, poetry, conda] +# If this is not set, the makefile will use the `poetry` command without activating +# an environment before hand. +# DEFAULT_INSTALL_ENV := conda + +# The default environment where Poetry will be installed. The choices are as follow: [venv, conda] +# If this is not set, the makefile will ask the user where they want to install Poetry +#DEFAULT_POETRY_INSTALL_ENV := venv + + ## -- Private targets ------------------------------------------------------------------------------------------------## diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index 0926b3d..53fa2b8 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -157,8 +157,8 @@ def __init__( self.target_mip = self.proj_constants.TARGET_MIP # Attributes that are going to be retrieved / set within this class for - ## (all) - ## (climate model inputs) + # (all) + # (climate model inputs) self.biomass_vars: list[str] = [] self.meta_vars_percentage: list[str] = [] self.meta_vars_share: list[str] = [] diff --git a/noxfile.py b/noxfile.py index 42460cb..c55478b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,22 +1,28 @@ +import re from pathlib import Path import nox +ARG_RE = re.compile(r"^-[-\w=]+$") # e.g. "-k", "--maxfail=1", "tests/foo.py" + nox.options.reuse_existing_virtualenvs = True # Reuse virtual environments nox.options.sessions = ["precommit"] def get_paths(session): package_path = Path(session.bin).parent.parent.parent + main_package = package_path / "climateset" + tests = package_path / "tests" + scripts = package_path / "scripts" return { "all": [ - package_path / "climateset", - package_path / "tests", - package_path / "scripts", + main_package, + tests, + scripts, ], "module": [ - package_path / "climateset", - package_path / "scripts", + main_package, + scripts, ], } @@ -38,6 +44,12 @@ def flake8(session): session.run("poetry", "run", "flake8", *paths["all"], external=True) +@nox.session() +def complexity(session): + paths = get_paths(session) + session.run("poetry", "run", "flake8", "--max-complexity", "7", *paths["all"], external=True) + + @nox.session() def docformatter(session): paths = get_paths(session) @@ -74,6 +86,8 @@ def check(session): @nox.session() def fix(session): paths = get_paths(session) + session.run("poetry", "run", "autoflake", "-v", *paths["all"], external=True) + session.run("poetry", "run", "autopep8", *paths["all"], external=True) session.run("poetry", "run", "black", *paths["all"], external=True) session.run("poetry", "run", "isort", *paths["all"], external=True) session.run("poetry", "run", "flynt", *paths["all"], external=True) @@ -94,6 +108,18 @@ def precommit(session): session.run("poetry", "run", "pre-commit", "run", "--all-files", external=True) +@nox.session() +def autoflake(session): + paths = get_paths(session) + session.run("poetry", "run", "autoflake", "-v", *paths["all"], external=True) + + +@nox.session() +def autopep(session): + paths = get_paths(session) + session.run("poetry", "run", "autopep8", *paths["all"], external=True) + + @nox.session() def black(session): paths = get_paths(session) @@ -112,6 +138,24 @@ def flynt(session): session.run("poetry", "run", "flynt", *paths["all"], external=True) +@nox.session(name="ruff-lint") +def ruff_lint(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "check", *paths["all"], external=True) + + +@nox.session(name="ruff-fix") +def ruff_fix(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "check", "--fix", *paths["all"], external=True) + + +@nox.session(name="ruff-format") +def ruff_format(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "format", *paths["all"], external=True) + + @nox.session() def test(session): session.run("poetry", "run", "pytest", external=True) @@ -119,8 +163,12 @@ def test(session): @nox.session() def test_custom(session): + for a in session.posargs: + if not ARG_RE.match(a): + session.error(f"unsafe pytest argument detected: {a!r}") + session.run( - "poetry", "run", "pytest", external=True, *session.posargs + "poetry", "run", "python", "-m", "pytest", external=True, *session.posargs ) # Pass additional arguments directly to pytest diff --git a/poetry.lock b/poetry.lock index bf4e591..99a465e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -210,6 +210,36 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "autoflake" +version = "2.3.1" +description = "Removes unused imports and unused variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"}, + {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"}, +] + +[package.dependencies] +pyflakes = ">=3.0.0" +tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} + +[[package]] +name = "autopep8" +version = "2.3.2" +description = "A tool that automatically formats Python code to conform to the PEP 8 style guide" +optional = false +python-versions = ">=3.9" +files = [ + {file = "autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128"}, + {file = "autopep8-2.3.2.tar.gz", hash = "sha256:89440a4f969197b69a995e4ce0661b031f455a9f776d2c5ba3dbd83466931758"}, +] + +[package.dependencies] +pycodestyle = ">=2.12.0" +tomli = {version = "*", markers = "python_version < \"3.11\""} + [[package]] name = "babel" version = "2.14.0" @@ -925,18 +955,18 @@ typing = ["typing-extensions (>=4.8)"] [[package]] name = "flake8" -version = "7.0.0" +version = "7.1.2" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" files = [ - {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, - {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, + {file = "flake8-7.1.2-py2.py3-none-any.whl", hash = "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a"}, + {file = "flake8-7.1.2.tar.gz", hash = "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" +pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" [[package]] @@ -2448,13 +2478,13 @@ tests = ["pytest"] [[package]] name = "pycodestyle" -version = "2.11.1" +version = "2.12.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, + {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, + {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, ] [[package]] @@ -3175,6 +3205,33 @@ files = [ {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"}, ] +[[package]] +name = "ruff" +version = "0.11.11" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.11.11-py3-none-linux_armv6l.whl", hash = "sha256:9924e5ae54125ed8958a4f7de320dab7380f6e9fa3195e3dc3b137c6842a0092"}, + {file = "ruff-0.11.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c8a93276393d91e952f790148eb226658dd275cddfde96c6ca304873f11d2ae4"}, + {file = "ruff-0.11.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6e333dbe2e6ae84cdedefa943dfd6434753ad321764fd937eef9d6b62022bcd"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7885d9a5e4c77b24e8c88aba8c80be9255fa22ab326019dac2356cff42089fc6"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b5ab797fcc09121ed82e9b12b6f27e34859e4227080a42d090881be888755d4"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e231ff3132c1119ece836487a02785f099a43992b95c2f62847d29bace3c75ac"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a97c9babe1d4081037a90289986925726b802d180cca784ac8da2bbbc335f709"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8c4ddcbe8a19f59f57fd814b8b117d4fcea9bee7c0492e6cf5fdc22cfa563c8"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6224076c344a7694c6fbbb70d4f2a7b730f6d47d2a9dc1e7f9d9bb583faf390b"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:882821fcdf7ae8db7a951df1903d9cb032bbe838852e5fc3c2b6c3ab54e39875"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:dcec2d50756463d9df075a26a85a6affbc1b0148873da3997286caf1ce03cae1"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99c28505ecbaeb6594701a74e395b187ee083ee26478c1a795d35084d53ebd81"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9263f9e5aa4ff1dec765e99810f1cc53f0c868c5329b69f13845f699fe74f639"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:64ac6f885e3ecb2fdbb71de2701d4e34526651f1e8503af8fb30d4915a3fe345"}, + {file = "ruff-0.11.11-py3-none-win32.whl", hash = "sha256:1adcb9a18802268aaa891ffb67b1c94cd70578f126637118e8099b8e4adcf112"}, + {file = "ruff-0.11.11-py3-none-win_amd64.whl", hash = "sha256:748b4bb245f11e91a04a4ff0f96e386711df0a30412b9fe0c74d5bdc0e4a531f"}, + {file = "ruff-0.11.11-py3-none-win_arm64.whl", hash = "sha256:6c51f136c0364ab1b774767aa8b86331bd8e9d414e2d107db7a2189f35ea1f7b"}, + {file = "ruff-0.11.11.tar.gz", hash = "sha256:7774173cc7c1980e6bf67569ebb7085989a78a103922fb83ef3dfe230cd0687d"}, +] + [[package]] name = "scipy" version = "1.13.1" @@ -3838,4 +3895,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "f022cd016ae910e8f019261294ca552fd34ebbc217b804d9be4aa5fe24d1446d" +content-hash = "df5d5315e96bcaea935280b93b4373244fdfbfe27df154f149483351d1d3642e" diff --git a/pyproject.toml b/pyproject.toml index 66f0c2f..9db4563 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ flake8-pyproject = "^1.2.3" black = "^24.4.2" nox = "^2024.4.15" docformatter = {extras = ["tomli"], version = "^1.7.5"} +autoflake = "^2.3.1" +autopep8 = "^2.3.2" +ruff = "^0.11.11" [tool.poetry.group.lab.dependencies] jupyterlab = "^4.0.10" @@ -98,21 +101,17 @@ replace = ''' ''' [tool.pylint] -disable = "C0114,C0115,C0116,R0903,R1710,W1203,W0511,W0718,C0302" +disable = "C0114,C0115,C0116,R0903,R1710,W1203" max-line-length = 120 max-locals = 20 max-args = 16 max-attributes = 20 -ignore = [ - ".git", - "migrations", - "__pycache__" -] [tool.flake8] max-line-length = 120 ignore = ["E203", "E266", "E501", "W503"] -max-complexity = 20 +docstring-convention = "numpy" +max-complexity = 18 per-file-ignores = [] exclude = [ ".git", @@ -120,6 +119,20 @@ exclude = [ "__pycache__" ] +[tool.autoflake] +remove-all-unused-imports = true +in-place = true +ignore-init-module-imports = true +remove-unused-variables = true +recursive = true + +[tool.autopep8] +max_line_length = 120 +in-place = true +aggressive = 2 +ignore = ["W503", "E203", "E501"] +recursive = true + [tool.black] line-length = 120 target-version = ["py311"] @@ -132,17 +145,72 @@ line-length = 120 transform-concats = true verbose = true -[tool.pytest.ini_options] -testpaths = [ - "tests", -] -markers = ["offline: mark a test as needing to be run offline.", ] - [tool.docformatter] -style = "google" +style = "numpy" pre-summary-newline = true wrap-descriptions = 120 wrap-summaries = 120 blank = false exclude = [] recursive = true + +[tool.ruff] +line-length = 120 + +target-version = "py311" + +exclude = [ + ".git", + "migrations", + "__pycache__", +] + +[tool.ruff.lint] +select = [ + "A", # Flake8 Built ins + "E", # Error (Flake8) + "F", # Pyflakes (Flake8) + "W", # Warning (Flake8) + "I", # isort (import sorting) + "N", # Naming conventions (Pylint, etc.) + "C90", # mccabe complexity (replaces flake8 --max-complexity and mccabe) + "B", # Bugbear (common linting issues) + "UP", # pyupgrade (suggests modern Python syntax) + "PLR", # Pylint refactor + "PLE", # Pylint error (specific Pylint error rules) + "PLW", # Pylint warning (specific Pylint warning rules) + "PLC", # Pylint convention (specific Pylint convention rules) + "R", # Refactor (Pylint refactoring suggestions) + "TID", # TO DO comments + "FAST",# FastAPI + "C4", # List and dict comprehensions + "DJ", # Django + "PIE", # Returns and unecessary returns + "Q", # Double quotes + "RET", # Fix return statements + "PTH", # Enforce pathlib + "ARG", # Unused argument + "FLY", # Flynt + "NPY", # Numpy specific + "PD", # Pandas specific + "RUF", # Ruff specific +] + +ignore = [ + "E203", # whitespace before ':', Black already handles this + "E266", # too many leading '#' for comments + "E501", # line too long (we enforce via line-length instead) + "RET504", + "RUF013", + "PTH123" +] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" # Corresponds to flake8's docstring-convention and docformatter style. + +[tool.ruff.lint.pylint] +max-args = 16 + +[tool.ruff.lint.mccabe] +# cyclomatic complexity +max-complexity = 18 From 73b56a287c8e13f5f2284da030ad16b0769ebec2 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 17:21:19 -0400 Subject: [PATCH 28/38] Ruff fix lint + formatting --- climateset/download/cmip6_downloader.py | 4 ++-- climateset/download/downloader.py | 3 +-- climateset/download/downloader_config.py | 7 +++---- climateset/download/input4mips_downloader.py | 2 +- climateset/download/utils.py | 14 ++++++-------- climateset/utils.py | 5 ++--- 6 files changed, 15 insertions(+), 20 deletions(-) diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py index d4a77da..f22b630 100644 --- a/climateset/download/cmip6_downloader.py +++ b/climateset/download/cmip6_downloader.py @@ -48,7 +48,7 @@ def download(self): model=model, project=self.config.project, variable=variable, experiment=experiment ) - def download_from_model_single_var( # noqa: C901 + def download_from_model_single_var( self, model: str, variable: str, @@ -122,7 +122,7 @@ def download_from_model_single_var( # noqa: C901 if len(ensemble_member_final_list) == 0: self.logger.info("WARNING: no overlap between available and desired ensemble members!") self.logger.info("Skipping.") - return None + return for ensemble_member in ensemble_member_final_list: self.logger.info(f"Ensembles member: {ensemble_member}") diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index c0dbf67..0d965ac 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -1,6 +1,5 @@ import logging import pathlib -from typing import Union from climateset.download.cmip6_downloader import cmip6_download_from_config from climateset.download.constants.esgf import CMIP6, INPUT4MIPS @@ -12,7 +11,7 @@ LOGGER = create_logger(__name__) -def download_from_config_file(config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER): +def download_from_config_file(config_file: str | pathlib.Path, logger: logging.Logger = LOGGER): """ This function downloads variables automatically from input config file Args: diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index 53fa2b8..c24ef29 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -3,7 +3,6 @@ import logging from abc import ABC from pathlib import Path -from typing import Union import yaml @@ -26,7 +25,7 @@ class AbstractDownloaderConfig(ABC): def __init__( self, project: str, - data_dir: Union[str, Path] = RAW_DATA, + data_dir: str | Path = RAW_DATA, experiments: list[str] = None, variables: list[str] = None, overwrite: bool = False, @@ -114,13 +113,13 @@ def generate_config_dict(self): config_dict[self.project][key] = value return config_dict - def generate_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + def generate_config_file(self, config_file_name: str, config_path: str | Path = CONFIGS) -> None: config_full_path = self._handle_yaml_config_path(config_file_name, config_path) data = self.generate_config_dict() with open(config_full_path, "w", encoding="utf-8") as config_file: yaml.dump(data, config_file, indent=2) - def add_to_config_file(self, config_file_name: str, config_path: Union[str, Path] = CONFIGS) -> None: + def add_to_config_file(self, config_file_name: str, config_path: str | Path = CONFIGS) -> None: config_full_path = self._handle_yaml_config_path(config_file_name, config_path) existing_config = {} if config_full_path.exists(): diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py index c58e7b2..cbba079 100644 --- a/climateset/download/input4mips_downloader.py +++ b/climateset/download/input4mips_downloader.py @@ -45,7 +45,7 @@ def download(self): self.logger.info(f"Downloading meta openburning share data for variable: {variable}") self.download_raw_input_single_var(variable=variable, institution_id="IAMC") - def download_raw_input_single_var( # noqa: C901 + def download_raw_input_single_var( self, variable: str, project: str = INPUT4MIPS, diff --git a/climateset/download/utils.py b/climateset/download/utils.py index 7581a69..92cacef 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -3,7 +3,6 @@ import re import subprocess import time -from typing import Union import xarray as xr @@ -30,7 +29,8 @@ def extract_target_mip_exp_name(filename: str, target_mip: str, logger: logging. if "covid" in filename: experiment = f"{experiment}_covid" elif target_mip == "CMIP": - if int(year_end) > 2015: + cutoff_year_for_historical = 2015 + if int(year_end) > cutoff_year_for_historical: logger.info(f"TARGET MIP : {filename}") experiment = f"ssp{filename.split('ssp')[-1][:3]}" else: @@ -158,7 +158,7 @@ def _download_process(temp_download_path, search_results, logger: logging.Logger def download_raw_input_variable( - project, institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA + project, institution_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA ): if isinstance(base_path, str): base_path = pathlib.Path(base_path) @@ -166,9 +166,7 @@ def download_raw_input_variable( _download_process(temp_download_path, search_results) -def download_model_variable( - project, model_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA -): +def download_model_variable(project, model_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA): if isinstance(base_path, str): base_path = pathlib.Path(base_path) temp_download_path = base_path / f"{project}/{model_id}/{variable}" @@ -176,7 +174,7 @@ def download_model_variable( def download_metadata_variable( - project, institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA + project, institution_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA ): if isinstance(base_path, str): base_path = pathlib.Path(base_path) @@ -266,7 +264,7 @@ def handle_yaml_config_path(config_file_name, config_path): return config_full_path -def match_key_in_list(input_key: str, key_list: list[str]) -> Union[str, None]: +def match_key_in_list(input_key: str, key_list: list[str]) -> str | None: for key in key_list: if input_key.lower() == key.lower(): return key diff --git a/climateset/utils.py b/climateset/utils.py index d93464b..720a8b1 100644 --- a/climateset/utils.py +++ b/climateset/utils.py @@ -1,7 +1,6 @@ import logging import pathlib import sys -from typing import Union import yaml @@ -56,7 +55,7 @@ def get_mip(experiment: str): return "CMIP" -def get_yaml_config(yaml_config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER) -> dict: +def get_yaml_config(yaml_config_file: str | pathlib.Path, logger: logging.Logger = LOGGER) -> dict: """ Reads a YAML configuration file and returns its contents as a dictionary. @@ -95,7 +94,7 @@ def get_yaml_config(yaml_config_file: Union[str, pathlib.Path], logger: logging. for path in potential_paths: if path.exists(): config_filepath = path - logger.info(f"Yaml config file [{str(path)}] found.") + logger.info(f"Yaml config file [{path!s}] found.") break params = {} From 4f0283bcf2ca97ca4be1837de47333fc81945cf5 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 17:21:46 -0400 Subject: [PATCH 29/38] Update and fix failing test --- tests/test_download/test_downloader.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 0fd6374..4debd59 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -170,11 +170,13 @@ def _assert_content_is_in_wget_script(mock_call, string_content): # With the provided inputs, there should be only 1 call. # We then access the call's arguments. We are interested in # the content of the wget script that is generated, and we - # want to make sure that for there inputs, we get the same files + # want to make sure that for the same inputs, we get the same files call_list = mock_call.call_args_list first_and_only_call = call_list[0] call_arguments = first_and_only_call.args[0] wget_script_content = call_arguments[2] + print(string_content) + print(wget_script_content) assert string_content in wget_script_content @@ -182,13 +184,17 @@ def test_download_raw_input_single_var(input4mips_downloader_object, mock_subpro download_subprocess = mock_subprocess_run input4mips_downloader_object.download_raw_input_single_var(variable="CO2_em_anthro", institution_id="PNNL-JGCRI") + # These are partial file strings. Since we download multiple variables at the same time, it's very complicated + # to specify versions for each without becoming cubbersome. Therefore, this test just looks for the file parts + # That don't change once a new version gets published (which made this test crash and required updating this + # variable expected_files = [ - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_175001-179912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_180001-184912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_185001-189912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_190001-194912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_195001-199912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-2024-11-25_gn_200001-202212.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-", + "_gn_175001-179912.nc", + "_gn_180001-184912.nc", + "_gn_185001-189912.nc", + "_gn_190001-194912.nc", + "_gn_195001-199912.nc", ] download_subprocess.assert_called_once() for f in expected_files: From 0ceafa06c6a3a5ff0915c3561775c040c5205d3b Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 18:10:12 -0400 Subject: [PATCH 30/38] Refactor input4mips constants for safety --- climateset/download/constants/esgf.py | 4 +- climateset/download/constants/input4mips.py | 771 +------------------ configs/downloader/constants/imput4MIPs.yaml | 732 ++++++++++++++++++ 3 files changed, 764 insertions(+), 743 deletions(-) create mode 100644 configs/downloader/constants/imput4MIPs.yaml diff --git a/climateset/download/constants/esgf.py b/climateset/download/constants/esgf.py index ba296c6..14080d6 100644 --- a/climateset/download/constants/esgf.py +++ b/climateset/download/constants/esgf.py @@ -1,6 +1,6 @@ from .cmip6 import Cmip6Constants from .cmip6plus import Cmip6plusConstants -from .input4mips import Input4mipsConstants +from .input4mips import INPUT4MIPS_CONSTANTS CMIP6 = "CMIP6" CMIP6PLUS = "CMIP6Plus" @@ -13,7 +13,7 @@ ESGF_PROJECTS_CONSTANTS = { CMIP6: Cmip6Constants, CMIP6PLUS: Cmip6plusConstants, - INPUT4MIPS: Input4mipsConstants, + INPUT4MIPS: INPUT4MIPS_CONSTANTS, } # datasets that provide inputs to climate models diff --git a/climateset/download/constants/input4mips.py b/climateset/download/constants/input4mips.py index ba78c50..dbdd06e 100644 --- a/climateset/download/constants/input4mips.py +++ b/climateset/download/constants/input4mips.py @@ -1,744 +1,33 @@ # TODO add VAR_SOURCE_LOOKUP with raw variables # TODO add supported experiments # TODO do we really need emission endings, meta_endings_prc, meta_endings_shar?? how is this used so far? -class Input4mipsConstants: - """ - Attributes: - NODE_LINK (str): Where the data can be accessed - EMISSION_ENDINGS (List): - META_ENDINGS_PRC (List): - META_ENDINGS_SHARE (List): - VAR_SOURCE_LOOKUP (Dict>): model and raw variables - """ - - NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" - - EMISSIONS_ENDINGS = ["_em_openburning", "_em_anthro", "_em_AIR_anthro"] - - META_ENDINGS_PRC = [ - "_percentage_AGRI", - "_percentage_BORF", - "_percentage_DEFO", - "_percentage_PEAT", - "_percentage_SAVA", - "_percentage_TEMF", - ] - - META_ENDINGS_SHAR = ["_openburning_share"] - - MIP_ERA = "CMIP6" - - TARGET_MIP = "ScenarioMIP" - - SUPPORTED_EXPERIMENTS = [ - "historical", - "ssp119", - "ssp126", - "ssp245", - "ssp370", - "ssp434", - "ssp460", - "ssp534-over", - "ssp585", - ] - - VAR_SOURCE_LOOKUP = [ - "years", - "year_weight", - "year_fr", - "wlenbinsize", - "wlen_bnds", - "wlen", - "wfo", - "wetnoy", - "wetnhx", - "water_vapor", - "vos", - "volume_density", - "vo", - "vmro3", - "vas", - "urban_to_secdn", - "urban_to_secdf", - "urban_to_range", - "urban_to_pastr", - "urban_to_c4per", - "urban_to_c4ann", - "urban_to_c3per", - "urban_to_c3nfx", - "urban_to_c3ann", - "urban", - "uos", - "uo", - "uas", - "tsi", - "ts", - "total_solar_irradiance", - "tosbcs", - "tos", - "thetao", - "theta", - "temp_level", - "temp_layer", - "tauv", - "tauu", - "tas", - "surface_temperature", - "surface_emissivity", - "surface_albedo", - "sst", - "ssn", - "ssi", - "ssa550", - "sos", - "solar_zenith_angle", - "so2f2_SH", - "so2f2_NH", - "so2f2_GM", - "so", - "sithick", - "sig_lon_W", - "sig_lon_E", - "sig_lat_W", - "sig_lat_E", - "siconcbcs", - "siconca", - "siconc", - "sftof", - "sftflf", - "sf6_SH", - "sf6_NH", - "sf6_GM", - "secyf_harv", - "secyf_bioh", - "secnf_harv", - "secnf_bioh", - "secmf_harv", - "secmf_bioh", - "secmb", - "secma", - "secdn_to_urban", - "secdn_to_secdf", - "secdn_to_range", - "secdn_to_pastr", - "secdn_to_c4per", - "secdn_to_c4ann", - "secdn_to_c3per", - "secdn_to_c3nfx", - "secdn_to_c3ann", - "secdn", - "secdf_to_urban", - "secdf_to_secdn", - "secdf_to_range", - "secdf_to_pastr", - "secdf_to_c4per", - "secdf_to_c4ann", - "secdf_to_c3per", - "secdf_to_c3nfx", - "secdf_to_c3ann", - "secdf", - "scph", - "scnum", - "sad_of_big_particles", - "sad", - "rsds", - "rndwd", - "rmean", - "rlds", - "range_to_urban", - "range_to_secdn", - "range_to_secdf", - "range_to_pastr", - "range_to_c4per", - "range_to_c4ann", - "range_to_c3per", - "range_to_c3nfx", - "range_to_c3ann", - "range", - "ptbio", - "psl", - "prsn", - "prra", - "profile_weight", - "primn_to_urban", - "primn_to_secdf", - "primn_to_range", - "primn_to_pastr", - "primn_to_c4per", - "primn_to_c4ann", - "primn_to_c3per", - "primn_to_c3nfx", - "primn_to_c3ann", - "primn_harv", - "primn_bioh", - "primn", - "primf_to_urban", - "primf_to_secdn", - "primf_to_range", - "primf_to_pastr", - "primf_to_c4per", - "primf_to_c4ann", - "primf_to_c3per", - "primf_to_c3nfx", - "primf_to_c3ann", - "primf_harv", - "primf_bioh", - "primf", - "pressure", - "pres_level", - "pres_layer", - "pr", - "plume_number", - "plume_lon", - "plume_lat", - "plume_feature", - "percentage_TEMF", - "percentage_SAVA", - "percentage_PEAT", - "percentage_DEFO", - "percentage_BORF", - "percentage_AGRI", - "pastr_to_urban", - "pastr_to_secdn", - "pastr_to_secdf", - "pastr_to_range", - "pastr_to_c4per", - "pastr_to_c4ann", - "pastr_to_c3per", - "pastr_to_c3nfx", - "pastr_to_c3ann", - "pastr", - "ozone", - "oxygen_GM", - "nitrous_oxide_SH", - "nitrous_oxide_NH", - "nitrous_oxide_GM", - "nitrogen_GM", - "nf3_SH", - "nf3_NH", - "nf3_GM", - "mrro", - "month", - "mole_fraction_of_so2f2_in_air", - "mole_fraction_of_sf6_in_air", - "mole_fraction_of_nitrous_oxide_in_air", - "mole_fraction_of_nf3_in_air", - "mole_fraction_of_methyl_chloride_in_air", - "mole_fraction_of_methyl_bromide_in_air", - "mole_fraction_of_methane_in_air", - "mole_fraction_of_hfc4310mee_in_air", - "mole_fraction_of_hfc365mfc_in_air", - "mole_fraction_of_hfc32_in_air", - "mole_fraction_of_hfc245fa_in_air", - "mole_fraction_of_hfc23_in_air", - "mole_fraction_of_hfc236fa_in_air", - "mole_fraction_of_hfc227ea_in_air", - "mole_fraction_of_hfc152a_in_air", - "mole_fraction_of_hfc143a_in_air", - "mole_fraction_of_hfc134aeq_in_air", - "mole_fraction_of_hfc134a_in_air", - "mole_fraction_of_hfc125_in_air", - "mole_fraction_of_hcfc22_in_air", - "mole_fraction_of_hcfc142b_in_air", - "mole_fraction_of_hcfc141b_in_air", - "mole_fraction_of_halon2402_in_air", - "mole_fraction_of_halon1301_in_air", - "mole_fraction_of_halon1211_in_air", - "mole_fraction_of_co2eq_in_air", - "mole_fraction_of_chcl3_in_air", - "mole_fraction_of_ch3ccl3_in_air", - "mole_fraction_of_ch2cl2_in_air", - "mole_fraction_of_cfc12eq_in_air", - "mole_fraction_of_cfc12_in_air", - "mole_fraction_of_cfc11eq_in_air", - "mole_fraction_of_cfc11_in_air", - "mole_fraction_of_cfc115_in_air", - "mole_fraction_of_cfc114_in_air", - "mole_fraction_of_cfc113_in_air", - "mole_fraction_of_cf4_in_air", - "mole_fraction_of_carbon_tetrachloride_in_air", - "mole_fraction_of_carbon_dioxide_in_air", - "mole_fraction_of_c_c4f8_in_air", - "mole_fraction_of_c8f18_in_air", - "mole_fraction_of_c7f16_in_air", - "mole_fraction_of_c6f14_in_air", - "mole_fraction_of_c5f12_in_air", - "mole_fraction_of_c4f10_in_air", - "mole_fraction_of_c3f8_in_air", - "mole_fraction_of_c2f6_in_air", - "methyl_chloride_SH", - "methyl_chloride_NH", - "methyl_chloride_GM", - "methyl_bromide_SH", - "methyl_bromide_NH", - "methyl_bromide_GM", - "methane_SH", - "methane_NH", - "methane_GM", - "mask4resto_ipv_Nextrop", - "mask4resto_ipv", - "mask4resto_amv_trop", - "mask4resto_amv_extrop", - "mask4resto_amv", - "lon_bounds", - "licalvf", - "lat_bounds", - "kp", - "is_biomass", - "irrig_c4per", - "irrig_c4ann", - "irrig_c3per", - "irrig_c3nfx", - "irrig_c3ann", - "ipv_index", - "iprp", - "iprm", - "iprg", - "icwtr", - "huss", - "hfds", - "hfc4310mee_SH", - "hfc4310mee_NH", - "hfc4310mee_GM", - "hfc365mfc_SH", - "hfc365mfc_NH", - "hfc365mfc_GM", - "hfc32_SH", - "hfc32_NH", - "hfc32_GM", - "hfc245fa_SH", - "hfc245fa_NH", - "hfc245fa_GM", - "hfc23_SH", - "hfc23_NH", - "hfc23_GM", - "hfc236fa_SH", - "hfc236fa_NH", - "hfc236fa_GM", - "hfc227ea_SH", - "hfc227ea_NH", - "hfc227ea_GM", - "hfc152a_SH", - "hfc152a_NH", - "hfc152a_GM", - "hfc143a_SH", - "hfc143a_NH", - "hfc143a_GM", - "hfc134aeq_SH", - "hfc134aeq_NH", - "hfc134aeq_GM", - "hfc134a_SH", - "hfc134a_NH", - "hfc134a_GM", - "hfc125_SH", - "hfc125_NH", - "hfc125_GM", - "hcfc22_SH", - "hcfc22_NH", - "hcfc22_GM", - "hcfc142b_SH", - "hcfc142b_NH", - "hcfc142b_GM", - "hcfc141b_SH", - "hcfc141b_NH", - "hcfc141b_GM", - "halon2402_SH", - "halon2402_NH", - "halon2402_GM", - "halon1301_SH", - "halon1301_NH", - "halon1301_GM", - "halon1211_SH", - "halon1211_NH", - "halon1211_GM", - "gzdis", - "gridcellarea", - "gpbio", - "gldis", - "glat_bnds", - "glat", - "fulwd", - "ftr_weight", - "fstnf", - "friver", - "flood", - "fill_flag", - "fharv_c4per", - "fharv_c3per", - "fertl_c4per", - "fertl_c4ann", - "fertl_c3per", - "fertl_c3nfx", - "fertl_c3ann", - "f107", - "expt_label", - "evspsbl", - "drynoy", - "drynhx", - "delta13co2_in_air", - "datasource", - "crpbf_total", - "crpbf_c4per", - "crpbf_c4ann", - "crpbf_c3per", - "crpbf_c3nfx", - "crpbf_c3ann", - "combf", - "co2eq_SH", - "co2eq_NH", - "co2eq_GM", - "chcl3_SH", - "chcl3_NH", - "chcl3_GM", - "ch3ccl3_SH", - "ch3ccl3_NH", - "ch3ccl3_GM", - "ch2cl2_SH", - "ch2cl2_NH", - "ch2cl2_GM", - "cfc12eq_SH", - "cfc12eq_NH", - "cfc12eq_GM", - "cfc12_SH", - "cfc12_NH", - "cfc12_GM", - "cfc11eq_SH", - "cfc11eq_NH", - "cfc11eq_GM", - "cfc11_SH", - "cfc11_NH", - "cfc11_GM", - "cfc115_SH", - "cfc115_NH", - "cfc115_GM", - "cfc114_SH", - "cfc114_NH", - "cfc114_GM", - "cfc113_SH", - "cfc113_NH", - "cfc113_GM", - "cf4_SH", - "cf4_NH", - "cf4_GM", - "ccode", - "carea", - "carbon_tetrachloride_SH", - "carbon_tetrachloride_NH", - "carbon_tetrachloride_GM", - "carbon_monoxide_GM", - "carbon_dioxide_SH", - "carbon_dioxide_NH", - "carbon_dioxide_GM", - "calyear", - "calmonth", - "calday", - "c_c4f8_SH", - "c_c4f8_NH", - "c_c4f8_GM", - "c8f18_SH", - "c8f18_NH", - "c8f18_GM", - "c7f16_SH", - "c7f16_NH", - "c7f16_GM", - "c6f14_SH", - "c6f14_NH", - "c6f14_GM", - "c5f12_SH", - "c5f12_NH", - "c5f12_GM", - "c4per_to_urban", - "c4per_to_secdn", - "c4per_to_secdf", - "c4per_to_range", - "c4per_to_pastr", - "c4per_to_c4ann", - "c4per_to_c3per", - "c4per_to_c3nfx", - "c4per_to_c3ann", - "c4per", - "c4f10_SH", - "c4f10_NH", - "c4f10_GM", - "c4ann_to_urban", - "c4ann_to_secdn", - "c4ann_to_secdf", - "c4ann_to_range", - "c4ann_to_pastr", - "c4ann_to_c4per", - "c4ann_to_c3per", - "c4ann_to_c3nfx", - "c4ann_to_c3ann", - "c4ann", - "c3per_to_urban", - "c3per_to_secdn", - "c3per_to_secdf", - "c3per_to_range", - "c3per_to_pastr", - "c3per_to_c4per", - "c3per_to_c4ann", - "c3per_to_c3nfx", - "c3per_to_c3ann", - "c3per", - "c3nfx_to_urban", - "c3nfx_to_secdn", - "c3nfx_to_secdf", - "c3nfx_to_range", - "c3nfx_to_pastr", - "c3nfx_to_c4per", - "c3nfx_to_c4ann", - "c3nfx_to_c3per", - "c3nfx_to_c3ann", - "c3nfx", - "c3f8_SH", - "c3f8_NH", - "c3f8_GM", - "c3ann_to_urban", - "c3ann_to_secdn", - "c3ann_to_secdf", - "c3ann_to_range", - "c3ann_to_pastr", - "c3ann_to_c4per", - "c3ann_to_c4ann", - "c3ann_to_c3per", - "c3ann_to_c3nfx", - "c3ann", - "c2f6_SH", - "c2f6_NH", - "c2f6_GM", - "bounds_time", - "bounds_sector", - "bounds_latitude", - "bounds_altitude", - "beta_b", - "beta_a", - "asy550", - "asl", - "areacello", - "areacellg", - "areacella", - "ap", - "aod_spmx", - "aod_fmbg", - "ann_cycle", - "angstrom", - "amv_index", - "altitude", - "added_tree_cover", - "acabf", - "WST", - "VOC_openburning_share", - "VOC_em_openburning", - "VOC_em_anthro", - "VOC_em_AIR_anthro", - "VOC25_other_voc_em_speciated_VOC_anthro", - "VOC25_other_voc_em_speciated_VOC", - "VOC25-other_voc_em_speciated_VOC", - "VOC24_acids_em_speciated_VOC_anthro", - "VOC24_acids_em_speciated_VOC", - "VOC24-acids_em_speciated_VOC", - "VOC23_ketones_em_speciated_VOC_anthro", - "VOC23_ketones_em_speciated_VOC", - "VOC23-ketones_em_speciated_VOC", - "VOC22_other_alka_em_speciated_VOC_anthro", - "VOC22_other_alka_em_speciated_VOC", - "VOC22-other_alka_em_speciated_VOC", - "VOC21_methanal_em_speciated_VOC_anthro", - "VOC21_methanal_em_speciated_VOC", - "VOC21-methanal_em_speciated_VOC", - "VOC20_chlorinate_em_speciated_VOC_anthro", - "VOC20_chlorinate_em_speciated_VOC", - "VOC20-chlorinate_em_speciated_VOC", - "VOC19_ethers_em_speciated_VOC_anthro", - "VOC19_ethers_em_speciated_VOC", - "VOC19-ethers_em_speciated_VOC", - "VOC18_esters_em_speciated_VOC_anthro", - "VOC18_esters_em_speciated_VOC", - "VOC18-esters_em_speciated_VOC", - "VOC17_other_arom_em_speciated_VOC_anthro", - "VOC17_other_arom_em_speciated_VOC", - "VOC17-other_arom_em_speciated_VOC", - "VOC16_trimethylb_em_speciated_VOC_anthro", - "VOC16_trimethylb_em_speciated_VOC", - "VOC16-trimethylb_em_speciated_VOC", - "VOC15_xylene_em_speciated_VOC_anthro", - "VOC15_xylene_em_speciated_VOC", - "VOC15-xylene_em_speciated_VOC", - "VOC14_toluene_em_speciated_VOC_anthro", - "VOC14_toluene_em_speciated_VOC", - "VOC14-toluene_em_speciated_VOC", - "VOC13_benzene_em_speciated_VOC_anthro", - "VOC13_benzene_em_speciated_VOC", - "VOC13-benzene_em_speciated_VOC", - "VOC12_other_alke_em_speciated_VOC_anthro", - "VOC12_other_alke_em_speciated_VOC", - "VOC12-other_alke_em_speciated_VOC", - "VOC09_ethyne_em_speciated_VOC_anthro", - "VOC09_ethyne_em_speciated_VOC", - "VOC09-ethyne_em_speciated_VOC", - "VOC08_propene_em_speciated_VOC_anthro", - "VOC08_propene_em_speciated_VOC", - "VOC08-propene_em_speciated_VOC", - "VOC07_ethene_em_speciated_VOC_anthro", - "VOC07_ethene_em_speciated_VOC", - "VOC07-ethene_em_speciated_VOC", - "VOC06_hexanes_pl_em_speciated_VOC_anthro", - "VOC06_hexanes_pl_em_speciated_VOC", - "VOC06-hexanes_pl_em_speciated_VOC", - "VOC05_pentanes_em_speciated_VOC_anthro", - "VOC05_pentanes_em_speciated_VOC", - "VOC05-pentanes_em_speciated_VOC", - "VOC04_butanes_em_speciated_VOC_anthro", - "VOC04_butanes_em_speciated_VOC", - "VOC04-butanes_em_speciated_VOC", - "VOC03_propane_em_speciated_VOC_anthro", - "VOC03_propane_em_speciated_VOC", - "VOC03-propane_em_speciated_VOC", - "VOC02_ethane_em_speciated_VOC_anthro", - "VOC02_ethane_em_speciated_VOC", - "VOC02-ethane_em_speciated_VOC", - "VOC01_alcohols_em_speciated_VOC_anthro", - "VOC01_alcohols_em_speciated_VOC", - "VOC01-alcohols_em_speciated_VOC", - "Toluene_lump", - "TRA", - "SO2_openburning_share", - "SO2_em_openburning", - "SO2_em_anthro", - "SO2_em_SOLID_BIOFUEL_anthro", - "SO2_em_AIR_anthro", - "SO2", - "SLV", - "SHP", - "RSLossRem", - "RCO", - "OC_openburning_share", - "OC_em_openburning", - "OC_em_anthro", - "OC_em_SOLID_BIOFUEL_anthro", - "OC_em_AIR_anthro", - "OC", - "NOx_openburning_share", - "NOx_em_openburning", - "NOx_em_anthro", - "NOx_em_SOLID_BIOFUEL_anthro", - "NOx_em_AIR_anthro", - "NOx", - "NMVOC_openburning_share", - "NMVOC_em_openburning", - "NMVOC_em_anthro", - "NMVOC_em_SOLID_BIOFUEL_anthro", - "NMVOC_em_AIR_anthro", - "NMVOC_Toluene_lump_speciated_VOC_openburning_share", - "NMVOC_Toluene_lump_em_speciated_VOC_openburning", - "NMVOC_MEK_speciated_VOC_openburning_share", - "NMVOC_MEK_em_speciated_VOC_openburning", - "NMVOC_Higher_Alkenes_speciated_VOC_openburning_share", - "NMVOC_Higher_Alkenes_em_speciated_VOC_openburning", - "NMVOC_Higher_Alkanes_speciated_VOC_openburning_share", - "NMVOC_Higher_Alkanes_em_speciated_VOC_openburning", - "NMVOC_HOCH2CHO_speciated_VOC_openburning_share", - "NMVOC_HOCH2CHO_em_speciated_VOC_openburning", - "NMVOC_HCOOH_speciated_VOC_openburning_share", - "NMVOC_HCOOH_em_speciated_VOC_openburning", - "NMVOC_HCN_speciated_VOC_openburning_share", - "NMVOC_HCN_em_speciated_VOC_openburning", - "NMVOC_CH3OH_speciated_VOC_openburning_share", - "NMVOC_CH3OH_em_speciated_VOC_openburning", - "NMVOC_CH3COOH_speciated_VOC_openburning_share", - "NMVOC_CH3COOH_em_speciated_VOC_openburning", - "NMVOC_CH3COCHO_speciated_VOC_openburning_share", - "NMVOC_CH3COCHO_em_speciated_VOC_openburning", - "NMVOC_CH2O_speciated_VOC_openburning_share", - "NMVOC_CH2O_em_speciated_VOC_openburning", - "NMVOC_C8H10_speciated_VOC_openburning_share", - "NMVOC_C8H10_em_speciated_VOC_openburning", - "NMVOC_C7H8_speciated_VOC_openburning_share", - "NMVOC_C7H8_em_speciated_VOC_openburning", - "NMVOC_C6H6_speciated_VOC_openburning_share", - "NMVOC_C6H6_em_speciated_VOC_openburning", - "NMVOC_C5H8_speciated_VOC_openburning_share", - "NMVOC_C5H8_em_speciated_VOC_openburning", - "NMVOC_C3H8_speciated_VOC_openburning_share", - "NMVOC_C3H8_em_speciated_VOC_openburning", - "NMVOC_C3H6_speciated_VOC_openburning_share", - "NMVOC_C3H6_em_speciated_VOC_openburning", - "NMVOC_C3H6O_speciated_VOC_openburning_share", - "NMVOC_C3H6O_em_speciated_VOC_openburning", - "NMVOC_C2H6_speciated_VOC_openburning_share", - "NMVOC_C2H6_em_speciated_VOC_openburning", - "NMVOC_C2H6S_speciated_VOC_openburning_share", - "NMVOC_C2H6S_em_speciated_VOC_openburning", - "NMVOC_C2H5OH_speciated_VOC_openburning_share", - "NMVOC_C2H5OH_em_speciated_VOC_openburning", - "NMVOC_C2H4_speciated_VOC_openburning_share", - "NMVOC_C2H4_em_speciated_VOC_openburning", - "NMVOC_C2H4O_speciated_VOC_openburning_share", - "NMVOC_C2H4O_em_speciated_VOC_openburning", - "NMVOC_C2H2_speciated_VOC_openburning_share", - "NMVOC_C2H2_em_speciated_VOC_openburning", - "NMVOC_C10H16_speciated_VOC_openburning_share", - "NMVOC_C10H16_em_speciated_VOC_openburning", - "NMVOC", - "NH3_openburning_share", - "NH3_em_openburning", - "NH3_em_anthro", - "NH3_em_SOLID_BIOFUEL_anthro", - "NH3_em_AIR_anthro", - "NH3", - "N2O", - "MEK", - "IND", - "Higher_Alkenes", - "Higher_Alkanes", - "HOCH2CHO", - "HCOOH", - "HCN", - "H2_openburning_share", - "H2_em_openburning", - "H2SO4_mass", - "H2", - "ENE", - "Delta14co2_in_air", - "CO_openburning_share", - "CO_em_openburning", - "CO_em_anthro", - "CO_em_SOLID_BIOFUEL_anthro", - "CO_em_AIR_anthro", - "CO2_em_anthro", - "CO2_em_AIR_anthro", - "CO2", - "CO", - "CH4_openburning_share", - "CH4_em_openburning", - "CH4_em_anthro", - "CH4_em_SOLID_BIOFUEL_anthro", - "CH4_em_AIR_anthro", - "CH4", - "CH3OH", - "CH3COOH", - "CH3COCHO", - "CH2O", - "C8H10", - "C7H8", - "C6H6", - "C5H8", - "C3H8", - "C3H6O", - "C3H6", - "C2H6S", - "C2H6", - "C2H5OH", - "C2H4O", - "C2H4", - "C2H2", - "C10H16", - "BC_openburning_share", - "BC_em_openburning", - "BC_em_anthro", - "BC_em_SOLID_BIOFUEL_anthro", - "BC_em_AIR_anthro", - "BC", - "AIR", - "AGR", - ] +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + + +@dataclass(frozen=True) +class Input4MIPSConstants: + NODE_LINK: Final[str] + EMISSIONS_ENDINGS: Final[tuple[str, ...]] + META_ENDINGS_PRC: Final[tuple[str, ...]] + META_ENDINGS_SHAR: Final[tuple[str, ...]] + MIP_ERA: Final[str] + TARGET_MIP: Final[str] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + + +_data = get_yaml_config("downloader/constants/imput4MIPs.yaml") + +INPUT4MIPS_CONSTANTS = Input4MIPSConstants( + NODE_LINK=_data["node_link"], + EMISSIONS_ENDINGS=tuple(_data["emissions_endings"]), + META_ENDINGS_PRC=tuple(_data["meta_endings_prc"]), + META_ENDINGS_SHAR=tuple(_data["meta_endings_shar"]), + MIP_ERA=_data["mip_era"], + TARGET_MIP=_data["target_mip"], + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/configs/downloader/constants/imput4MIPs.yaml b/configs/downloader/constants/imput4MIPs.yaml new file mode 100644 index 0000000..cd00b19 --- /dev/null +++ b/configs/downloader/constants/imput4MIPs.yaml @@ -0,0 +1,732 @@ +node_link: "http://esgf-node.llnl.gov/esg-search/" + +emissions_endings: + - "_em_openburning" + - "_em_anthro" + - "_em_AIR_anthro" + +meta_endings_prc: + - "_percentage_AGRI" + - "_percentage_BORF" + - "_percentage_DEFO" + - "_percentage_PEAT" + - "_percentage_SAVA" + - "_percentage_TEMF" + +meta_endings_shar: + - "_openburning_share" + +mip_era: "CMIP6" + +target_mip: "ScenarioMIP" + +supported_experiments: + - "historical" + - "ssp119" + - "ssp126" + - "ssp245" + - "ssp370" + - "ssp434" + - "ssp460" + - "ssp534-over" + - "ssp585" + +var_source_lookup: + - "years" + - "year_weight" + - "year_fr" + - "wlenbinsize" + - "wlen_bnds" + - "wlen" + - "wfo" + - "wetnoy" + - "wetnhx" + - "water_vapor" + - "vos" + - "volume_density" + - "vo" + - "vmro3" + - "vas" + - "urban_to_secdn" + - "urban_to_secdf" + - "urban_to_range" + - "urban_to_pastr" + - "urban_to_c4per" + - "urban_to_c4ann" + - "urban_to_c3per" + - "urban_to_c3nfx" + - "urban_to_c3ann" + - "urban" + - "uos" + - "uo" + - "uas" + - "tsi" + - "ts" + - "total_solar_irradiance" + - "tosbcs" + - "tos" + - "thetao" + - "theta" + - "temp_level" + - "temp_layer" + - "tauv" + - "tauu" + - "tas" + - "surface_temperature" + - "surface_emissivity" + - "surface_albedo" + - "sst" + - "ssn" + - "ssi" + - "ssa550" + - "sos" + - "solar_zenith_angle" + - "so2f2_SH" + - "so2f2_NH" + - "so2f2_GM" + - "so" + - "sithick" + - "sig_lon_W" + - "sig_lon_E" + - "sig_lat_W" + - "sig_lat_E" + - "siconcbcs" + - "siconca" + - "siconc" + - "sftof" + - "sftflf" + - "sf6_SH" + - "sf6_NH" + - "sf6_GM" + - "secyf_harv" + - "secyf_bioh" + - "secnf_harv" + - "secnf_bioh" + - "secmf_harv" + - "secmf_bioh" + - "secmb" + - "secma" + - "secdn_to_urban" + - "secdn_to_secdf" + - "secdn_to_range" + - "secdn_to_pastr" + - "secdn_to_c4per" + - "secdn_to_c4ann" + - "secdn_to_c3per" + - "secdn_to_c3nfx" + - "secdn_to_c3ann" + - "secdn" + - "secdf_to_urban" + - "secdf_to_secdn" + - "secdf_to_range" + - "secdf_to_pastr" + - "secdf_to_c4per" + - "secdf_to_c4ann" + - "secdf_to_c3per" + - "secdf_to_c3nfx" + - "secdf_to_c3ann" + - "secdf" + - "scph" + - "scnum" + - "sad_of_big_particles" + - "sad" + - "rsds" + - "rndwd" + - "rmean" + - "rlds" + - "range_to_urban" + - "range_to_secdn" + - "range_to_secdf" + - "range_to_pastr" + - "range_to_c4per" + - "range_to_c4ann" + - "range_to_c3per" + - "range_to_c3nfx" + - "range_to_c3ann" + - "range" + - "ptbio" + - "psl" + - "prsn" + - "prra" + - "profile_weight" + - "primn_to_urban" + - "primn_to_secdf" + - "primn_to_range" + - "primn_to_pastr" + - "primn_to_c4per" + - "primn_to_c4ann" + - "primn_to_c3per" + - "primn_to_c3nfx" + - "primn_to_c3ann" + - "primn_harv" + - "primn_bioh" + - "primn" + - "primf_to_urban" + - "primf_to_secdn" + - "primf_to_range" + - "primf_to_pastr" + - "primf_to_c4per" + - "primf_to_c4ann" + - "primf_to_c3per" + - "primf_to_c3nfx" + - "primf_to_c3ann" + - "primf_harv" + - "primf_bioh" + - "primf" + - "pressure" + - "pres_level" + - "pres_layer" + - "pr" + - "plume_number" + - "plume_lon" + - "plume_lat" + - "plume_feature" + - "percentage_TEMF" + - "percentage_SAVA" + - "percentage_PEAT" + - "percentage_DEFO" + - "percentage_BORF" + - "percentage_AGRI" + - "pastr_to_urban" + - "pastr_to_secdn" + - "pastr_to_secdf" + - "pastr_to_range" + - "pastr_to_c4per" + - "pastr_to_c4ann" + - "pastr_to_c3per" + - "pastr_to_c3nfx" + - "pastr_to_c3ann" + - "pastr" + - "ozone" + - "oxygen_GM" + - "nitrous_oxide_SH" + - "nitrous_oxide_NH" + - "nitrous_oxide_GM" + - "nitrogen_GM" + - "nf3_SH" + - "nf3_NH" + - "nf3_GM" + - "mrro" + - "month" + - "mole_fraction_of_so2f2_in_air" + - "mole_fraction_of_sf6_in_air" + - "mole_fraction_of_nitrous_oxide_in_air" + - "mole_fraction_of_nf3_in_air" + - "mole_fraction_of_methyl_chloride_in_air" + - "mole_fraction_of_methyl_bromide_in_air" + - "mole_fraction_of_methane_in_air" + - "mole_fraction_of_hfc4310mee_in_air" + - "mole_fraction_of_hfc365mfc_in_air" + - "mole_fraction_of_hfc32_in_air" + - "mole_fraction_of_hfc245fa_in_air" + - "mole_fraction_of_hfc23_in_air" + - "mole_fraction_of_hfc236fa_in_air" + - "mole_fraction_of_hfc227ea_in_air" + - "mole_fraction_of_hfc152a_in_air" + - "mole_fraction_of_hfc143a_in_air" + - "mole_fraction_of_hfc134aeq_in_air" + - "mole_fraction_of_hfc134a_in_air" + - "mole_fraction_of_hfc125_in_air" + - "mole_fraction_of_hcfc22_in_air" + - "mole_fraction_of_hcfc142b_in_air" + - "mole_fraction_of_hcfc141b_in_air" + - "mole_fraction_of_halon2402_in_air" + - "mole_fraction_of_halon1301_in_air" + - "mole_fraction_of_halon1211_in_air" + - "mole_fraction_of_co2eq_in_air" + - "mole_fraction_of_chcl3_in_air" + - "mole_fraction_of_ch3ccl3_in_air" + - "mole_fraction_of_ch2cl2_in_air" + - "mole_fraction_of_cfc12eq_in_air" + - "mole_fraction_of_cfc12_in_air" + - "mole_fraction_of_cfc11eq_in_air" + - "mole_fraction_of_cfc11_in_air" + - "mole_fraction_of_cfc115_in_air" + - "mole_fraction_of_cfc114_in_air" + - "mole_fraction_of_cfc113_in_air" + - "mole_fraction_of_cf4_in_air" + - "mole_fraction_of_carbon_tetrachloride_in_air" + - "mole_fraction_of_carbon_dioxide_in_air" + - "mole_fraction_of_c_c4f8_in_air" + - "mole_fraction_of_c8f18_in_air" + - "mole_fraction_of_c7f16_in_air" + - "mole_fraction_of_c6f14_in_air" + - "mole_fraction_of_c5f12_in_air" + - "mole_fraction_of_c4f10_in_air" + - "mole_fraction_of_c3f8_in_air" + - "mole_fraction_of_c2f6_in_air" + - "methyl_chloride_SH" + - "methyl_chloride_NH" + - "methyl_chloride_GM" + - "methyl_bromide_SH" + - "methyl_bromide_NH" + - "methyl_bromide_GM" + - "methane_SH" + - "methane_NH" + - "methane_GM" + - "mask4resto_ipv_Nextrop" + - "mask4resto_ipv" + - "mask4resto_amv_trop" + - "mask4resto_amv_extrop" + - "mask4resto_amv" + - "lon_bounds" + - "licalvf" + - "lat_bounds" + - "kp" + - "is_biomass" + - "irrig_c4per" + - "irrig_c4ann" + - "irrig_c3per" + - "irrig_c3nfx" + - "irrig_c3ann" + - "ipv_index" + - "iprp" + - "iprm" + - "iprg" + - "icwtr" + - "huss" + - "hfds" + - "hfc4310mee_SH" + - "hfc4310mee_NH" + - "hfc4310mee_GM" + - "hfc365mfc_SH" + - "hfc365mfc_NH" + - "hfc365mfc_GM" + - "hfc32_SH" + - "hfc32_NH" + - "hfc32_GM" + - "hfc245fa_SH" + - "hfc245fa_NH" + - "hfc245fa_GM" + - "hfc23_SH" + - "hfc23_NH" + - "hfc23_GM" + - "hfc236fa_SH" + - "hfc236fa_NH" + - "hfc236fa_GM" + - "hfc227ea_SH" + - "hfc227ea_NH" + - "hfc227ea_GM" + - "hfc152a_SH" + - "hfc152a_NH" + - "hfc152a_GM" + - "hfc143a_SH" + - "hfc143a_NH" + - "hfc143a_GM" + - "hfc134aeq_SH" + - "hfc134aeq_NH" + - "hfc134aeq_GM" + - "hfc134a_SH" + - "hfc134a_NH" + - "hfc134a_GM" + - "hfc125_SH" + - "hfc125_NH" + - "hfc125_GM" + - "hcfc22_SH" + - "hcfc22_NH" + - "hcfc22_GM" + - "hcfc142b_SH" + - "hcfc142b_NH" + - "hcfc142b_GM" + - "hcfc141b_SH" + - "hcfc141b_NH" + - "hcfc141b_GM" + - "halon2402_SH" + - "halon2402_NH" + - "halon2402_GM" + - "halon1301_SH" + - "halon1301_NH" + - "halon1301_GM" + - "halon1211_SH" + - "halon1211_NH" + - "halon1211_GM" + - "gzdis" + - "gridcellarea" + - "gpbio" + - "gldis" + - "glat_bnds" + - "glat" + - "fulwd" + - "ftr_weight" + - "fstnf" + - "friver" + - "flood" + - "fill_flag" + - "fharv_c4per" + - "fharv_c3per" + - "fertl_c4per" + - "fertl_c4ann" + - "fertl_c3per" + - "fertl_c3nfx" + - "fertl_c3ann" + - "f107" + - "expt_label" + - "evspsbl" + - "drynoy" + - "drynhx" + - "delta13co2_in_air" + - "datasource" + - "crpbf_total" + - "crpbf_c4per" + - "crpbf_c4ann" + - "crpbf_c3per" + - "crpbf_c3nfx" + - "crpbf_c3ann" + - "combf" + - "co2eq_SH" + - "co2eq_NH" + - "co2eq_GM" + - "chcl3_SH" + - "chcl3_NH" + - "chcl3_GM" + - "ch3ccl3_SH" + - "ch3ccl3_NH" + - "ch3ccl3_GM" + - "ch2cl2_SH" + - "ch2cl2_NH" + - "ch2cl2_GM" + - "cfc12eq_SH" + - "cfc12eq_NH" + - "cfc12eq_GM" + - "cfc12_SH" + - "cfc12_NH" + - "cfc12_GM" + - "cfc11eq_SH" + - "cfc11eq_NH" + - "cfc11eq_GM" + - "cfc11_SH" + - "cfc11_NH" + - "cfc11_GM" + - "cfc115_SH" + - "cfc115_NH" + - "cfc115_GM" + - "cfc114_SH" + - "cfc114_NH" + - "cfc114_GM" + - "cfc113_SH" + - "cfc113_NH" + - "cfc113_GM" + - "cf4_SH" + - "cf4_NH" + - "cf4_GM" + - "ccode" + - "carea" + - "carbon_tetrachloride_SH" + - "carbon_tetrachloride_NH" + - "carbon_tetrachloride_GM" + - "carbon_monoxide_GM" + - "carbon_dioxide_SH" + - "carbon_dioxide_NH" + - "carbon_dioxide_GM" + - "calyear" + - "calmonth" + - "calday" + - "c_c4f8_SH" + - "c_c4f8_NH" + - "c_c4f8_GM" + - "c8f18_SH" + - "c8f18_NH" + - "c8f18_GM" + - "c7f16_SH" + - "c7f16_NH" + - "c7f16_GM" + - "c6f14_SH" + - "c6f14_NH" + - "c6f14_GM" + - "c5f12_SH" + - "c5f12_NH" + - "c5f12_GM" + - "c4per_to_urban" + - "c4per_to_secdn" + - "c4per_to_secdf" + - "c4per_to_range" + - "c4per_to_pastr" + - "c4per_to_c4ann" + - "c4per_to_c3per" + - "c4per_to_c3nfx" + - "c4per_to_c3ann" + - "c4per" + - "c4f10_SH" + - "c4f10_NH" + - "c4f10_GM" + - "c4ann_to_urban" + - "c4ann_to_secdn" + - "c4ann_to_secdf" + - "c4ann_to_range" + - "c4ann_to_pastr" + - "c4ann_to_c4per" + - "c4ann_to_c3per" + - "c4ann_to_c3nfx" + - "c4ann_to_c3ann" + - "c4ann" + - "c3per_to_urban" + - "c3per_to_secdn" + - "c3per_to_secdf" + - "c3per_to_range" + - "c3per_to_pastr" + - "c3per_to_c4per" + - "c3per_to_c4ann" + - "c3per_to_c3nfx" + - "c3per_to_c3ann" + - "c3per" + - "c3nfx_to_urban" + - "c3nfx_to_secdn" + - "c3nfx_to_secdf" + - "c3nfx_to_range" + - "c3nfx_to_pastr" + - "c3nfx_to_c4per" + - "c3nfx_to_c4ann" + - "c3nfx_to_c3per" + - "c3nfx_to_c3ann" + - "c3nfx" + - "c3f8_SH" + - "c3f8_NH" + - "c3f8_GM" + - "c3ann_to_urban" + - "c3ann_to_secdn" + - "c3ann_to_secdf" + - "c3ann_to_range" + - "c3ann_to_pastr" + - "c3ann_to_c4per" + - "c3ann_to_c4ann" + - "c3ann_to_c3per" + - "c3ann_to_c3nfx" + - "c3ann" + - "c2f6_SH" + - "c2f6_NH" + - "c2f6_GM" + - "bounds_time" + - "bounds_sector" + - "bounds_latitude" + - "bounds_altitude" + - "beta_b" + - "beta_a" + - "asy550" + - "asl" + - "areacello" + - "areacellg" + - "areacella" + - "ap" + - "aod_spmx" + - "aod_fmbg" + - "ann_cycle" + - "angstrom" + - "amv_index" + - "altitude" + - "added_tree_cover" + - "acabf" + - "WST" + - "VOC_openburning_share" + - "VOC_em_openburning" + - "VOC_em_anthro" + - "VOC_em_AIR_anthro" + - "VOC25_other_voc_em_speciated_VOC_anthro" + - "VOC25_other_voc_em_speciated_VOC" + - "VOC25-other_voc_em_speciated_VOC" + - "VOC24_acids_em_speciated_VOC_anthro" + - "VOC24_acids_em_speciated_VOC" + - "VOC24-acids_em_speciated_VOC" + - "VOC23_ketones_em_speciated_VOC_anthro" + - "VOC23_ketones_em_speciated_VOC" + - "VOC23-ketones_em_speciated_VOC" + - "VOC22_other_alka_em_speciated_VOC_anthro" + - "VOC22_other_alka_em_speciated_VOC" + - "VOC22-other_alka_em_speciated_VOC" + - "VOC21_methanal_em_speciated_VOC_anthro" + - "VOC21_methanal_em_speciated_VOC" + - "VOC21-methanal_em_speciated_VOC" + - "VOC20_chlorinate_em_speciated_VOC_anthro" + - "VOC20_chlorinate_em_speciated_VOC" + - "VOC20-chlorinate_em_speciated_VOC" + - "VOC19_ethers_em_speciated_VOC_anthro" + - "VOC19_ethers_em_speciated_VOC" + - "VOC19-ethers_em_speciated_VOC" + - "VOC18_esters_em_speciated_VOC_anthro" + - "VOC18_esters_em_speciated_VOC" + - "VOC18-esters_em_speciated_VOC" + - "VOC17_other_arom_em_speciated_VOC_anthro" + - "VOC17_other_arom_em_speciated_VOC" + - "VOC17-other_arom_em_speciated_VOC" + - "VOC16_trimethylb_em_speciated_VOC_anthro" + - "VOC16_trimethylb_em_speciated_VOC" + - "VOC16-trimethylb_em_speciated_VOC" + - "VOC15_xylene_em_speciated_VOC_anthro" + - "VOC15_xylene_em_speciated_VOC" + - "VOC15-xylene_em_speciated_VOC" + - "VOC14_toluene_em_speciated_VOC_anthro" + - "VOC14_toluene_em_speciated_VOC" + - "VOC14-toluene_em_speciated_VOC" + - "VOC13_benzene_em_speciated_VOC_anthro" + - "VOC13_benzene_em_speciated_VOC" + - "VOC13-benzene_em_speciated_VOC" + - "VOC12_other_alke_em_speciated_VOC_anthro" + - "VOC12_other_alke_em_speciated_VOC" + - "VOC12-other_alke_em_speciated_VOC" + - "VOC09_ethyne_em_speciated_VOC_anthro" + - "VOC09_ethyne_em_speciated_VOC" + - "VOC09-ethyne_em_speciated_VOC" + - "VOC08_propene_em_speciated_VOC_anthro" + - "VOC08_propene_em_speciated_VOC" + - "VOC08-propene_em_speciated_VOC" + - "VOC07_ethene_em_speciated_VOC_anthro" + - "VOC07_ethene_em_speciated_VOC" + - "VOC07-ethene_em_speciated_VOC" + - "VOC06_hexanes_pl_em_speciated_VOC_anthro" + - "VOC06_hexanes_pl_em_speciated_VOC" + - "VOC06-hexanes_pl_em_speciated_VOC" + - "VOC05_pentanes_em_speciated_VOC_anthro" + - "VOC05_pentanes_em_speciated_VOC" + - "VOC05-pentanes_em_speciated_VOC" + - "VOC04_butanes_em_speciated_VOC_anthro" + - "VOC04_butanes_em_speciated_VOC" + - "VOC04-butanes_em_speciated_VOC" + - "VOC03_propane_em_speciated_VOC_anthro" + - "VOC03_propane_em_speciated_VOC" + - "VOC03-propane_em_speciated_VOC" + - "VOC02_ethane_em_speciated_VOC_anthro" + - "VOC02_ethane_em_speciated_VOC" + - "VOC02-ethane_em_speciated_VOC" + - "VOC01_alcohols_em_speciated_VOC_anthro" + - "VOC01_alcohols_em_speciated_VOC" + - "VOC01-alcohols_em_speciated_VOC" + - "Toluene_lump" + - "TRA" + - "SO2_openburning_share" + - "SO2_em_openburning" + - "SO2_em_anthro" + - "SO2_em_SOLID_BIOFUEL_anthro" + - "SO2_em_AIR_anthro" + - "SO2" + - "SLV" + - "SHP" + - "RSLossRem" + - "RCO" + - "OC_openburning_share" + - "OC_em_openburning" + - "OC_em_anthro" + - "OC_em_SOLID_BIOFUEL_anthro" + - "OC_em_AIR_anthro" + - "OC" + - "NOx_openburning_share" + - "NOx_em_openburning" + - "NOx_em_anthro" + - "NOx_em_SOLID_BIOFUEL_anthro" + - "NOx_em_AIR_anthro" + - "NOx" + - "NMVOC_openburning_share" + - "NMVOC_em_openburning" + - "NMVOC_em_anthro" + - "NMVOC_em_SOLID_BIOFUEL_anthro" + - "NMVOC_em_AIR_anthro" + - "NMVOC_Toluene_lump_speciated_VOC_openburning_share" + - "NMVOC_Toluene_lump_em_speciated_VOC_openburning" + - "NMVOC_MEK_speciated_VOC_openburning_share" + - "NMVOC_MEK_em_speciated_VOC_openburning" + - "NMVOC_Higher_Alkenes_speciated_VOC_openburning_share" + - "NMVOC_Higher_Alkenes_em_speciated_VOC_openburning" + - "NMVOC_Higher_Alkanes_speciated_VOC_openburning_share" + - "NMVOC_Higher_Alkanes_em_speciated_VOC_openburning" + - "NMVOC_HOCH2CHO_speciated_VOC_openburning_share" + - "NMVOC_HOCH2CHO_em_speciated_VOC_openburning" + - "NMVOC_HCOOH_speciated_VOC_openburning_share" + - "NMVOC_HCOOH_em_speciated_VOC_openburning" + - "NMVOC_HCN_speciated_VOC_openburning_share" + - "NMVOC_HCN_em_speciated_VOC_openburning" + - "NMVOC_CH3OH_speciated_VOC_openburning_share" + - "NMVOC_CH3OH_em_speciated_VOC_openburning" + - "NMVOC_CH3COOH_speciated_VOC_openburning_share" + - "NMVOC_CH3COOH_em_speciated_VOC_openburning" + - "NMVOC_CH3COCHO_speciated_VOC_openburning_share" + - "NMVOC_CH3COCHO_em_speciated_VOC_openburning" + - "NMVOC_CH2O_speciated_VOC_openburning_share" + - "NMVOC_CH2O_em_speciated_VOC_openburning" + - "NMVOC_C8H10_speciated_VOC_openburning_share" + - "NMVOC_C8H10_em_speciated_VOC_openburning" + - "NMVOC_C7H8_speciated_VOC_openburning_share" + - "NMVOC_C7H8_em_speciated_VOC_openburning" + - "NMVOC_C6H6_speciated_VOC_openburning_share" + - "NMVOC_C6H6_em_speciated_VOC_openburning" + - "NMVOC_C5H8_speciated_VOC_openburning_share" + - "NMVOC_C5H8_em_speciated_VOC_openburning" + - "NMVOC_C3H8_speciated_VOC_openburning_share" + - "NMVOC_C3H8_em_speciated_VOC_openburning" + - "NMVOC_C3H6_speciated_VOC_openburning_share" + - "NMVOC_C3H6_em_speciated_VOC_openburning" + - "NMVOC_C3H6O_speciated_VOC_openburning_share" + - "NMVOC_C3H6O_em_speciated_VOC_openburning" + - "NMVOC_C2H6_speciated_VOC_openburning_share" + - "NMVOC_C2H6_em_speciated_VOC_openburning" + - "NMVOC_C2H6S_speciated_VOC_openburning_share" + - "NMVOC_C2H6S_em_speciated_VOC_openburning" + - "NMVOC_C2H5OH_speciated_VOC_openburning_share" + - "NMVOC_C2H5OH_em_speciated_VOC_openburning" + - "NMVOC_C2H4_speciated_VOC_openburning_share" + - "NMVOC_C2H4_em_speciated_VOC_openburning" + - "NMVOC_C2H4O_speciated_VOC_openburning_share" + - "NMVOC_C2H4O_em_speciated_VOC_openburning" + - "NMVOC_C2H2_speciated_VOC_openburning_share" + - "NMVOC_C2H2_em_speciated_VOC_openburning" + - "NMVOC_C10H16_speciated_VOC_openburning_share" + - "NMVOC_C10H16_em_speciated_VOC_openburning" + - "NMVOC" + - "NH3_openburning_share" + - "NH3_em_openburning" + - "NH3_em_anthro" + - "NH3_em_SOLID_BIOFUEL_anthro" + - "NH3_em_AIR_anthro" + - "NH3" + - "N2O" + - "MEK" + - "IND" + - "Higher_Alkenes" + - "Higher_Alkanes" + - "HOCH2CHO" + - "HCOOH" + - "HCN" + - "H2_openburning_share" + - "H2_em_openburning" + - "H2SO4_mass" + - "H2" + - "ENE" + - "Delta14co2_in_air" + - "CO_openburning_share" + - "CO_em_openburning" + - "CO_em_anthro" + - "CO_em_SOLID_BIOFUEL_anthro" + - "CO_em_AIR_anthro" + - "CO2_em_anthro" + - "CO2_em_AIR_anthro" + - "CO2" + - "CO" + - "CH4_openburning_share" + - "CH4_em_openburning" + - "CH4_em_anthro" + - "CH4_em_SOLID_BIOFUEL_anthro" + - "CH4_em_AIR_anthro" + - "CH4" + - "CH3OH" + - "CH3COOH" + - "CH3COCHO" + - "CH2O" + - "C8H10" + - "C7H8" + - "C6H6" + - "C5H8" + - "C3H8" + - "C3H6O" + - "C3H6" + - "C2H6S" + - "C2H6" + - "C2H5OH" + - "C2H4O" + - "C2H4" + - "C2H2" + - "C10H16" + - "BC_openburning_share" + - "BC_em_openburning" + - "BC_em_anthro" + - "BC_em_SOLID_BIOFUEL_anthro" + - "BC_em_AIR_anthro" + - "BC" + - "AIR" + - "AGR" From 3ed950cf5d43e59dc08aa1b646a234a0db568a6e Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 18:20:45 -0400 Subject: [PATCH 31/38] Handle pylint warnings --- climateset/download/constants/input4mips.py | 1 + climateset/download/downloader_config.py | 2 +- climateset/download/utils.py | 2 +- pyproject.toml | 58 ++++++++++++--------- 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/climateset/download/constants/input4mips.py b/climateset/download/constants/input4mips.py index dbdd06e..9099191 100644 --- a/climateset/download/constants/input4mips.py +++ b/climateset/download/constants/input4mips.py @@ -1,6 +1,7 @@ # TODO add VAR_SOURCE_LOOKUP with raw variables # TODO add supported experiments # TODO do we really need emission endings, meta_endings_prc, meta_endings_shar?? how is this used so far? +# pylint: disable=C0103 from dataclasses import dataclass from typing import Final diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index c24ef29..1f19a1b 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -199,7 +199,7 @@ def _generate_plain_emission_vars(self): for b in self.biomass_vars: try: self.variables.remove(b) - except Exception as error: + except Exception as error: # pylint: disable=W0718 self.logger.warning(f"Caught the following exception but continuing : {error}") self.meta_vars_percentage = [ diff --git a/climateset/download/utils.py b/climateset/download/utils.py index 92cacef..35023b4 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -93,7 +93,7 @@ def infer_nominal_resolution(ds: xr.Dataset, nominal_resolution: str, logger: lo degree = abs(ds.lon[0].item() - ds.lon[1].item()) nom_res = int(degree * 100) logger.info(f"Inferring nominal resolution: {nom_res}") - except Exception as error: + except Exception as error: # pylint: disable=W0718 logger.warning(f"Caught the following exception but continuing : {error}") return nom_res diff --git a/pyproject.toml b/pyproject.toml index 9db4563..d49a233 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ pre-commit = "^3.7.0" flake8-pyproject = "^1.2.3" black = "^24.4.2" nox = "^2024.4.15" -docformatter = {extras = ["tomli"], version = "^1.7.5"} +docformatter = { extras = ["tomli"], version = "^1.7.5" } autoflake = "^2.3.1" autopep8 = "^2.3.2" ruff = "^0.11.11" @@ -101,7 +101,15 @@ replace = ''' ''' [tool.pylint] -disable = "C0114,C0115,C0116,R0903,R1710,W1203" +disable = [ + "C0114", + "C0115", + "C0116", + "R0903", + "R1710", + "W1203", + "W0511", # TO DO warnings +] max-line-length = 120 max-locals = 20 max-args = 16 @@ -160,49 +168,49 @@ line-length = 120 target-version = "py311" exclude = [ - ".git", - "migrations", - "__pycache__", + ".git", + "migrations", + "__pycache__", ] [tool.ruff.lint] select = [ - "A", # Flake8 Built ins - "E", # Error (Flake8) - "F", # Pyflakes (Flake8) - "W", # Warning (Flake8) - "I", # isort (import sorting) - "N", # Naming conventions (Pylint, etc.) + "A", # Flake8 Built ins + "E", # Error (Flake8) + "F", # Pyflakes (Flake8) + "W", # Warning (Flake8) + "I", # isort (import sorting) + "N", # Naming conventions (Pylint, etc.) "C90", # mccabe complexity (replaces flake8 --max-complexity and mccabe) - "B", # Bugbear (common linting issues) - "UP", # pyupgrade (suggests modern Python syntax) + "B", # Bugbear (common linting issues) + "UP", # pyupgrade (suggests modern Python syntax) "PLR", # Pylint refactor "PLE", # Pylint error (specific Pylint error rules) "PLW", # Pylint warning (specific Pylint warning rules) "PLC", # Pylint convention (specific Pylint convention rules) - "R", # Refactor (Pylint refactoring suggestions) + "R", # Refactor (Pylint refactoring suggestions) "TID", # TO DO comments - "FAST",# FastAPI - "C4", # List and dict comprehensions - "DJ", # Django + "FAST", # FastAPI + "C4", # List and dict comprehensions + "DJ", # Django "PIE", # Returns and unecessary returns - "Q", # Double quotes + "Q", # Double quotes "RET", # Fix return statements "PTH", # Enforce pathlib "ARG", # Unused argument "FLY", # Flynt "NPY", # Numpy specific - "PD", # Pandas specific + "PD", # Pandas specific "RUF", # Ruff specific ] ignore = [ - "E203", # whitespace before ':', Black already handles this - "E266", # too many leading '#' for comments - "E501", # line too long (we enforce via line-length instead) - "RET504", - "RUF013", - "PTH123" + "E203", # whitespace before ':', Black already handles this + "E266", # too many leading '#' for comments + "E501", # line too long (we enforce via line-length instead) + "RET504", + "RUF013", + "PTH123" ] [tool.ruff.lint.pydocstyle] From 99561a38eed307f974dc68916552c838b2cbae1b Mon Sep 17 00:00:00 2001 From: f-PLT Date: Fri, 23 May 2025 22:45:17 -0400 Subject: [PATCH 32/38] Update github actions --- .github/workflows/lint.yml | 5 ++++- .github/workflows/precommit.yml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 55cd09f..f83e30c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,8 +50,11 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' - name: Install dependencies + env: + CONDA_TOOL: mamba run: | - make CONDA_TOOL=mamba install + make poetry-install-venv + make install - name: Run linting checks run: | diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 8b6a139..b3079ee 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -50,8 +50,11 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' - name: Install dependencies + env: + CONDA_TOOL: mamba run: | - make CONDA_TOOL=mamba install + make poetry-install-venv + make install - name: Run Pre-commit checks run: | From bf53e467ade81b1e683eb08d0d9e356514f88ed5 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Mon, 26 May 2025 19:17:24 -0400 Subject: [PATCH 33/38] Formatting for pyproject.toml --- pyproject.toml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d49a233..36aea85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -175,32 +175,32 @@ exclude = [ [tool.ruff.lint] select = [ - "A", # Flake8 Built ins - "E", # Error (Flake8) - "F", # Pyflakes (Flake8) - "W", # Warning (Flake8) - "I", # isort (import sorting) - "N", # Naming conventions (Pylint, etc.) + "A", # Flake8 Built ins + "E", # Error (Flake8) + "F", # Pyflakes (Flake8) + "W", # Warning (Flake8) + "I", # isort (import sorting) + "N", # Naming conventions (Pylint, etc.) "C90", # mccabe complexity (replaces flake8 --max-complexity and mccabe) - "B", # Bugbear (common linting issues) - "UP", # pyupgrade (suggests modern Python syntax) + "B", # Bugbear (common linting issues) + "UP", # pyupgrade (suggests modern Python syntax) "PLR", # Pylint refactor "PLE", # Pylint error (specific Pylint error rules) "PLW", # Pylint warning (specific Pylint warning rules) "PLC", # Pylint convention (specific Pylint convention rules) - "R", # Refactor (Pylint refactoring suggestions) + "R", # Refactor (Pylint refactoring suggestions) "TID", # TO DO comments - "FAST", # FastAPI - "C4", # List and dict comprehensions - "DJ", # Django - "PIE", # Returns and unecessary returns - "Q", # Double quotes + "FAST",# FastAPI + "C4", # List and dict comprehensions + "DJ", # Django + "PIE", # Returns and unnecessary returns + "Q", # Double quotes "RET", # Fix return statements "PTH", # Enforce pathlib "ARG", # Unused argument "FLY", # Flynt "NPY", # Numpy specific - "PD", # Pandas specific + "PD", # Pandas specific "RUF", # Ruff specific ] From 665d77b9b967c32ccbd68d1f66ab66250f772463 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 28 May 2025 14:32:56 -0400 Subject: [PATCH 34/38] Refactor downloader constants --- climateset/download/constants/cmip6.py | 950 +------------------ climateset/download/constants/cmip6plus.py | 43 +- climateset/download/constants/esgf.py | 8 +- climateset/download/constants/input4mips.py | 11 + configs/downloader/constants/cmip6.yaml | 916 ++++++++++++++++++ configs/downloader/constants/cmip6plus.yaml | 13 + configs/downloader/constants/imput4MIPs.yaml | 2 +- tests/test_download/test_downloader.py | 2 +- 8 files changed, 997 insertions(+), 948 deletions(-) create mode 100644 configs/downloader/constants/cmip6.yaml create mode 100644 configs/downloader/constants/cmip6plus.yaml diff --git a/climateset/download/constants/cmip6.py b/climateset/download/constants/cmip6.py index e32276c..656a29f 100644 --- a/climateset/download/constants/cmip6.py +++ b/climateset/download/constants/cmip6.py @@ -1,934 +1,34 @@ # TODO remove raw variables from here +# pylint: disable=C0103 +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + + +@dataclass(frozen=True) class Cmip6Constants: """ + Dataclass to represent CMIP6 constants that are used by the download module. + Attributes: - NODE_LINK (str): Where the data can be accessed - MODEL_SOURCES (List): Identifiers for supported climate models - VAR_SOURCE_LOOKUP (Dict>): model and raw variables - SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + NODE_LINK : Where the data can be accessed + MODEL_SOURCES : Identifiers for supported climate models + VAR_SOURCE_LOOKUP : model and raw variables + SUPPORTED_EXPERIMENTS : experiments of climate models (runs) that are supported """ - # The values here have been retrieved from here: - # https://wcrp-cmip.org/cmip-data-access/ - - # This entry node link is automatically changing to other nodes - NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" + NODE_LINK: Final[str] + MODEL_SOURCES: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] - # Supported Model sources - MODEL_SOURCES = [ - "4AOP-v1-5", - "ACCESS-CM2", - "ACCESS-ESM1-5", - "ACCESS-OM2", - "ACCESS-OM2-025", - "ARTS-2-3", - "AWI-CM-1-1-HR", - "AWI-CM-1-1-LR", - "AWI-CM-1-1-MR", - "AWI-ESM-1-1-LR", - "AWI-ESM-2-1-LR", - "BCC-CSM2-HR", - "BCC-CSM2-MR", - "BCC-ESM1", - "CAM-MPAS-HR", - "CAM-MPAS-LR", - "CAMS-CSM1-0", - "CanESM5", - "CanESM5-1", - "CanESM5-CanOE", - "CAS-ESM2-0", - "CESM1-1-CAM5-CMIP5", - "CESM1-CAM5-SE-HR", - "CESM1-CAM5-SE-LR", - "CESM1-WACCM-SC", - "CESM2", - "CESM2-FV2", - "CESM2-WACCM", - "CESM2-WACCM-FV2", - "CIESM", - "CMCC-CM2-HR4", - "CMCC-CM2-SR5", - "CMCC-CM2-VHR4", - "CMCC-ESM2", - "CNRM-CM6-1", - "CNRM-CM6-1-HR", - "CNRM-ESM2-1", - "E3SM-1-0", - "E3SM-1-1", - "E3SM-1-1-ECA", - "E3SM-2-0", - "EC-Earth3", - "EC-Earth3-AerChem", - "EC-Earth3-CC", - "EC-Earth3-GrIS", - "EC-Earth3-HR", - "EC-Earth3-LR", - "EC-Earth3-Veg", - "EC-Earth3-Veg-LR", - "EC-Earth3P", - "EC-Earth3P-HR", - "EC-Earth3P-VHR", - "ECMWF-IFS-HR", - "ECMWF-IFS-LR", - "ECMWF-IFS-MR", - "FGOALS-f3-H", - "FGOALS-f3-L", - "FGOALS-g3", - "FIO-ESM-2-0", - "GFDL-AM4", - "GFDL-CM4", - "GFDL-CM4C192", - "GFDL-ESM2M", - "GFDL-ESM4", - "GFDL-GRTCODE", - "GFDL-OM4p5B", - "GFDL-RFM-DISORT", - "GISS-E2-1-G", - "GISS-E2-1-G-CC", - "GISS-E2-1-H", - "GISS-E2-2-G", - "GISS-E2-2-H", - "GISS-E3-G", - "HadGEM3-GC31-HH", - "HadGEM3-GC31-HM", - "HadGEM3-GC31-LL", - "HadGEM3-GC31-LM", - "HadGEM3-GC31-MH", - "HadGEM3-GC31-MM", - "HiRAM-SIT-HR", - "HiRAM-SIT-LR", - "ICON-ESM-LR", - "IITM-ESM", - "INM-CM4-8", - "INM-CM5-0", - "INM-CM5-H", - "IPSL-CM5A2-INCA", - "IPSL-CM6A-ATM-HR", - "IPSL-CM6A-ATM-ICO-HR", - "IPSL-CM6A-ATM-ICO-LR", - "IPSL-CM6A-ATM-ICO-MR", - "IPSL-CM6A-ATM-ICO-VHR", - "IPSL-CM6A-ATM-LR-REPROBUS", - "IPSL-CM6A-LR", - "IPSL-CM6A-LR-INCA", - "IPSL-CM6A-MR1", - "KACE-1-0-G", - "KIOST-ESM", - "LBLRTM-12-8", - "MCM-UA-1-0", - "MIROC-ES2H", - "MIROC-ES2H-NB", - "MIROC-ES2L", - "MIROC6", - "MPI-ESM-1-2-HAM", - "MPI-ESM1-2-HR", - "MPI-ESM1-2-LR", - "MPI-ESM1-2-XR", - "MRI-AGCM3-2-H", - "MRI-AGCM3-2-S", - "MRI-ESM2-0", - "NESM3", - "NICAM16-7S", - "NICAM16-8S", - "NICAM16-9S", - "NorCPM1", - "NorESM1-F", - "NorESM2-LM", - "NorESM2-MH", - "RRTMG-LW-4-91", - "RRTMG-SW-4-02", - "RTE-RRTMGP-181204", - "SAM0-UNICON", - "TaiESM1", - "TaiESM1-TIMCOM", - "TaiESM1-TIMCOM2", - "UKESM1-0-LL", - "UKESM1-1-LL", - "UKESM1-ice-LL", - "E3SM-2-0-NARRM", - "E3SM-2-1", - "EC-Earth3-ESM-1", - "PCMDI-test-1-0", - ] - VAR_SOURCE_LOOKUP = [ - "ztp", - "zsatcalc", - "zsatarag", - "zostoga", - "zossq", - "zos", - "zoocos", - "zooc", - "zo2min", - "zhalfo", - "zg500", - "zg1000", - "zg100", - "zg10", - "zg", - "zfullo", - "wtd", - "wo", - "wmo", - "wfonocorr", - "wfo", - "wetss", - "wetso4", - "wetso2", - "wetlandFrac", - "wetlandCH4", - "wetbc", - "wap500", - "wap", - "vsf", - "volo", - "volcello", - "vo", - "vmo", - "vegHeight", - "va", - "uo", - "umo", - "ua", - "tslsi", - "tsl", - "ts", - "tran", - "tossq", - "tosga", - "tos", - "tob", - "thkcello", - "thetaot700", - "thetaot300", - "thetaot2000", - "thetaot", - "thetaoga", - "thetao", - "tgs", - "tcs", - "tauvo", - "tauv", - "tauuo", - "tauu", - "tasmin", - "tasmax", - "tas", - "talkos", - "talknat", - "talk", - "ta850", - "ta700", - "ta500", - "ta", - "t20d", - "spco2", - "sossq", - "sosga", - "sos", - "sootsn", - "somint", - "soga", - "sob", - "so2", - "so", - "snw", - "sndmasswindrif", - "sndmasssnf", - "sndmasssi", - "sndmassmelt", - "snd", - "snc", - "sivols", - "sivoln", - "sivol", - "siv", - "siu", - "sitimefrac", - "sithick", - "sitemptop", - "sitempsnic", - "sitempbot", - "sistryubot", - "sistrydtop", - "sistrxubot", - "sistrxdtop", - "sispeed", - "sisnthick", - "sisnmass", - "sisnhc", - "sisnconc", - "sirdgthick", - "sirdgconc", - "sipr", - "sios", - "simpconc", - "simass", - "siitdthick", - "siitdsnthick", - "siitdsnconc", - "siitdconc", - "sihc", - "siforcetilty", - "siforcetiltx", - "siforceintstry", - "siforceintstrx", - "siforcecorioly", - "siforcecoriolx", - "siflswutop", - "siflswdtop", - "siflswdbot", - "siflsensupbot", - "siflsenstop", - "sifllwutop", - "sifllwdtop", - "sifllatstop", - "siflfwdrain", - "siflfwbot", - "siflcondtop", - "siflcondbot", - "sifb", - "siextents", - "siextentn", - "sidmasstrany", - "sidmasstranx", - "sidmassth", - "sidmasssi", - "sidmassmelttop", - "sidmassmeltbot", - "sidmasslat", - "sidmassgrowthwat", - "sidmassgrowthbot", - "sidmassevapsubl", - "sidmassdyn", - "sidivvel", - "sidconcth", - "sidconcdyn", - "siconc", - "sicompstren", - "siarean", - "siage", - "si", - "sftof", - "sftlf", - "sftgif", - "sfdsi", - "sfcWind", - "sf6", - "rtmt", - "rsutcsaf", - "rsutcs", - "rsutaf", - "rsut", - "rsuscs", - "rsus", - "rsntds", - "rsdt", - "rsdsdiff", - "rsdscs", - "rsds", - "rlutcsaf", - "rlutcs", - "rlutaf", - "rlut", - "rlus", - "rldscs", - "rlds", - "rh", - "reffclwtop", - "ra", - "rMaint", - "rGrowth", - "qgwr", - "pso", - "psl", - "ps", - "prw", - "prveg", - "prsn", - "prra", - "prc", - "pr", - "ppos", - "pp", - "popos", - "pop", - "ponos", - "pon", - "po4os", - "po4", - "phynos", - "phyn", - "phyfeos", - "phyfe", - "phyc", - "phos", - "phnat", - "phalf", - "ph", - "pfull", - "pctisccp", - "pbo", - "orog", - "opottempmint", - "oh", - "od870aer", - "od550ss", - "od550so4", - "od550oa", - "od550lt1aer", - "od550dust", - "od550csaer", - "od550bc", - "od550aerh2o", - "od550aer", - "od440aer", - "obvfsq", - "o3", - "o2satos", - "o2sat", - "o2os", - "o2min", - "o2", - "nppWood", - "nppRoot", - "nppLeaf", - "npp", - "no3os", - "no3", - "nep", - "nbp", - "nVeg", - "nStem", - "nSoil", - "nRoot", - "nMineralNO3", - "nMineralNH4", - "nMineral", - "nLitter", - "nLeaf", - "nLand", - "n2oglobal", - "msftmzmpa", - "msftmz", - "msftmrhompa", - "msftmrho", - "msftbarot", - "mrtws", - "mrsos", - "mrsol", - "mrso", - "mrsll", - "mrsfl", - "mrros", - "mrrob", - "mrro", - "mrlso", - "mrfso", - "mmrss", - "mmrsoa", - "mmrso4", - "mmrpm2p5", - "mmrpm1", - "mmroa", - "mmrdust", - "mmrbc", - "mmraerh2o", - "mlotstsq", - "mlotstmin", - "mlotstmax", - "mlotst", - "mfo", - "masso", - "masscello", - "lwsnl", - "lwp", - "loadss", - "loaddust", - "lai", - "isop", - "intpp", - "intpoc", - "intpn2", - "intdoc", - "intdic", - "huss", - "hus", - "hurs", - "hur", - "hfy", - "hfx", - "hfss", - "hfls", - "hfds", - "hfbasinpmdiff", - "hfbasinpmadv", - "hfbasinpadv", - "hfbasin", - "gpp", - "fsitherm", - "froc", - "frn", - "friver", - "fric", - "frfe", - "ficeberg", - "fgo2", - "fgdms", - "fgco2nat", - "fgco2", - "fVegLitterSenescence", - "fVegLitterMortality", - "fVegLitter", - "fNup", - "fNnetmin", - "fNloss", - "fNleach", - "fNgasNonFire", - "fNgasFire", - "fNgas", - "fNfert", - "fNdep", - "fNProduct", - "fNOx", - "fN2O", - "fLuc", - "fLitterFire", - "fHarvestToProduct", - "fHarvest", - "fFireNat", - "fFire", - "fDeforestToProduct", - "fBNF", - "evspsblveg", - "evspsblsoi", - "evspsbl", - "evs", - "esn", - "es", - "epsi100", - "epp100", - "epn100", - "epfe100", - "epcalc100", - "epc100", - "emivoc", - "emiss", - "emiso4", - "emiso2", - "emioa", - "emiisop", - "emidust", - "emidms", - "emibvoc", - "emibc", - "ec", - "dryso4", - "dryso2", - "drybc", - "dpco2", - "dmsos", - "dms", - "dmlt", - "dissocos", - "dissoc", - "dissicos", - "dissicnat", - "dissic", - "dfeos", - "dfe", - "detocos", - "detoc", - "deptho", - "cod", - "co3satcalcos", - "co3satcalc", - "co3sataragos", - "co3satarag", - "co3os", - "co3nat", - "co3", - "co2mass", - "co2", - "clwvi", - "clwmodis", - "clw", - "cltmodis", - "cltisccp", - "cltcalipso", - "clt", - "clmcalipso", - "cllcalipso", - "clivi", - "climodis", - "cli", - "clhcalipso", - "cl", - "chlos", - "chl", - "chepsoa", - "ch4global", - "cfc12global", - "cfc12", - "cfc11global", - "cfc11", - "cdnc", - "cct", - "ccn", - "ccb", - "calcos", - "calc", - "cWood", - "cVeg", - "cStem", - "cSoilSlow", - "cSoilMedium", - "cSoilFast", - "cSoilAbove1m", - "cSoil", - "cRoot", - "cMisc", - "cLitter", - "cLeaf", - "cLand", - "cCwd", - "bsios", - "bsi", - "bldep", - "bfeos", - "bfe", - "basin", - "ares", - "areacello", - "areacella", - "albisccp", - "airmass", - "agessc", - "abs550aer", - ] +_data = get_yaml_config("downloader/constants/cmip6.yaml") - SUPPORTED_EXPERIMENTS = [ - "hist-1950HC", - "lfmip-pdLC", - "ssp126", - "ssp126-ssp370Lu", - "ssp245", - "ssp370", - "ssp370-lowNTCF", - "ssp370-ssp126Lu", - "ssp370SST", - "ssp370SST-lowCH4", - "ssp370SST-lowNTCF", - "ssp370SST-ssp126Lu", - "ssp585", - "hist-resAMO", - "hist-resIPO", - "historical-ext", - "lfmip-initLC", - "lfmip-pdLC-cruNcep", - "lfmip-pdLC-princeton", - "lfmip-pdLC-wfdei", - "lfmip-rmLC", - "lfmip-rmLC-cruNcep", - "lfmip-rmLC-princeton", - "lfmip-rmLC-wfdei", - "pa-futAntSIC", - "pa-futArcSIC", - "pa-pdSIC", - "pa-piAntSIC", - "pa-piArcSIC", - "ssp119", - "ssp370pdSST", - "ssp370SST-lowAer", - "ssp370SST-lowBC", - "ssp370SST-lowO3", - "ssp434", - "ssp460", - "dcppC-atl-pacemaker", - "dcppC-pac-pacemaker", - "pa-futAntSIC-ext", - "pa-futArcSIC-ext", - "pa-pdSIC-ext", - "ssp370-lowNTCFCH4", - "ssp370SST-lowNTCFCH4", - "volc-cluster-21C", - "yr2010CO2", - "dcppA-historical-niff", - "1pctCO2", - "1pctCO2-bgc", - "abrupt-4xCO2", - "dcppC-amv-neg", - "dcppC-amv-pos", - "dcppC-atl-control", - "dcppC-ipv-neg", - "dcppC-ipv-pos", - "dcppC-pac-control", - "deforest-globe", - "faf-heat", - "faf-heat-NA50pct", - "faf-stress", - "faf-water", - "G1", - "hist-aer", - "hist-GHG", - "hist-nat", - "hist-noLu", - "hist-piNTCF", - "hist-spAer-all", - "histSST", - "histSST-noLu", - "histSST-piCH4", - "histSST-piNTCF", - "piClim-4xCO2", - "piClim-aer", - "piClim-anthro", - "piClim-CH4", - "piClim-control", - "piClim-ghg", - "piClim-HC", - "piClim-lu", - "piClim-NTCF", - "volc-long-eq", - "volc-pinatubo-full", - "volc-pinatubo-strat", - "volc-pinatubo-surf", - "1pctCO2-rad", - "1pctCO2Ndep", - "1pctCO2Ndep-bgc", - "abrupt-0p5xCO2", - "abrupt-2xCO2", - "abrupt-solm4p", - "abrupt-solp4p", - "dcppC-amv-ExTrop-neg", - "dcppC-amv-ExTrop-pos", - "dcppC-amv-Trop-neg", - "dcppC-amv-Trop-pos", - "dcppC-ipv-NexTrop-neg", - "dcppC-ipv-NexTrop-pos", - "faf-all", - "faf-antwater-stress", - "faf-heat-NA0pct", - "faf-passiveheat", - "hist-bgc", - "hist-piAer", - "hist-spAer-aer", - "hist-stratO3", - "histSST-piAer", - "histSST-piN2O", - "histSST-piO3", - "piClim-2xdust", - "piClim-2xss", - "piClim-BC", - "piClim-histaer", - "piClim-histall", - "piClim-histghg", - "piClim-histnat", - "piClim-N2O", - "piClim-O3", - "piClim-spAer-aer", - "piClim-spAer-anthro", - "piClim-spAer-histaer", - "piClim-spAer-histall", - "piSST-4xCO2-solar", - "volc-cluster-ctrl", - "volc-long-hlN", - "hist-all-aer2", - "hist-all-nat2", - "hist-CO2", - "hist-sol", - "hist-totalO3", - "hist-volc", - "piClim-2xDMS", - "piClim-2xfire", - "piClim-2xNOx", - "piClim-2xVOC", - "piClim-NH3", - "piClim-NOx", - "piClim-OC", - "piClim-SO2", - "piClim-VOC", - "volc-long-hlS", - "histSST-1950HC", - "esm-ssp585", - "esm-ssp585-ssp126Lu", - "esm-hist-ext", - "ssp534-over-bgc", - "ssp585-bgc", - "esm-1pct-brch-1000PgC", - "esm-1pct-brch-750PgC", - "esm-1pct-brch-2000PgC", - "esm-hist", - "esm-pi-cdr-pulse", - "esm-pi-CO2pulse", - "esm-1pctCO2", - "esm-bell-750PgC", - "esm-bell-1000PgC", - "esm-bell-2000PgC", - "esm-yr2010CO2-control", - "1pctCO2-4xext", - "1pctCO2-cdr", - "esm-ssp534-over", - "esm-ssp585-ocn-alk", - "esm-ssp585ext", - "esm-ssp585-ocn-alk-stop", - "esm-ssp585-ssp126Lu-ext", - "esm-yr2010CO2-cdr-pulse", - "esm-yr2010CO2-CO2pulse", - "esm-yr2010CO2-noemit", - "amip", - "amip-4xCO2", - "amip-future4K", - "amip-hist", - "amip-p4K", - "aqua-4xCO2", - "aqua-control", - "aqua-p4K", - "highresSST-present", - "ism-ctrl-std", - "ism-pdControl-std", - "ism-piControl-self", - "land-hist", - "land-hist-altStartYear", - "land-noLu", - "land-ssp126", - "land-ssp585", - "lgm", - "lig127k", - "midHolocene", - "midPliocene-eoi400", - "omip1", - "past1000", - "piControl-withism", - "rad-irf", - "a4SST", - "a4SSTice", - "a4SSTice-4xCO2", - "amip-a4SST-4xCO2", - "amip-lfmip-pdLC", - "amip-lfmip-pObs", - "amip-lfmip-rmLC", - "amip-lwoff", - "amip-m4K", - "amip-p4K-lwoff", - "amip-piForcing", - "aqua-control-lwoff", - "aqua-p4K-lwoff", - "dcppA-assim", - "esm-piControl-spinup", - "land-cClim", - "land-cCO2", - "land-crop-grass", - "land-crop-noFert", - "land-crop-noIrrig", - "land-crop-noIrrigFert", - "land-hist-altLu1", - "land-hist-altLu2", - "land-hist-cruNcep", - "land-hist-princeton", - "land-hist-wfdei", - "land-noFire", - "land-noPasture", - "land-noShiftCultivate", - "land-noWoodHarv", - "land-ssp434", - "omip1-spunup", - "past1000-solaronly", - "past1000-volconly", - "piControl-spinup", - "piControl-spinup-cmip5", - "piSST", - "piSST-4xCO2", - "piSST-4xCO2-rad", - "piSST-pxK", - "spinup-1950", - "amip-hld", - "amip-TIP", - "amip-TIP-nosh", - "control-slab", - "dcppC-atl-spg", - "esm-past1000", - "ism-lig127k-std", - "omip2", - "omip2-spunup", - "past2k", - "esm-piControl", - "historical", - "historical-cmip5", - "hist-aer-cmip5", - "hist-GHG-cmip5", - "hist-nat-cmip5", - "piControl", - "piControl-cmip5", - "ssp245-aer", - "ssp245-cov-strgreen", - "ssp245-covid", - "ssp245-cov-aer", - "ssp245-cov-fossil", - "ssp245-cov-GHG", - "ssp245-cov-modgreen", - "ssp245-GHG", - "ssp245-nat", - "ssp245-stratO3", - "dcppA-hindcast", - "dcppB-forecast", - "dcppC-forecast-addPinatubo", - "dcppC-hindcast-noPinatubo", - "dcppC-hindcast-noAgung", - "dcppC-hindcast-noElChichon", - "dcppC-forecast-addAgung", - "dcppC-forecast-addElChichon", - "dcppA-hindcast-niff", - "futureSST-4xCO2-solar", - "G6solar", - "G6sulfur", - "G6SST1", - "G7cirrus", - "G7SST1-cirrus", - "ssp534-over", - "G6SST2-solar", - "G6SST2-sulfur", - "G7SST2-cirrus", - "control-1950", - "hist-1950", - "highres-future", - "highresSST-4xCO2", - "highresSST-future", - "highresSST-LAI", - "highresSST-p4K", - "highresSST-smoothed", - "1pctCO2to4x-withism", - "historical-withism", - "ism-1pctCO2to4x-self", - "ism-historical-self", - "ism-1pctCO2to4x-std", - "ism-historical-std", - "ism-asmb-std", - "ism-bsmb-std", - "ism-amip-std", - "ism-ssp585-self", - "ism-ssp585-std", - "ssp585-withism", - "pdSST-futAntSIC", - "pdSST-futArcSIC", - "pdSST-pdSIC", - "pdSST-piAntSIC", - "pdSST-piArcSIC", - "piSST-pdSIC", - "futSST-pdSIC", - "piSST-piSIC", - "amip-climSIC", - "amip-climSST", - "modelSST-futArcSIC", - "modelSST-pdSIC", - "pdSST-futArcSICSIT", - "pdSST-futBKSeasSIC", - "pdSST-futOkhotskSIC", - "pdSST-pdSICSIT", - "rcp26-cmip5", - "rcp45-cmip5", - "rcp60-cmip5", - "rcp85-cmip5", - "volc-cluster-mill", - "volc-pinatubo-slab", - ] +CMIP6_CONSTANTS = Cmip6Constants( + NODE_LINK=_data["node_link"], + MODEL_SOURCES=tuple(_data["model_sources"]), + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/climateset/download/constants/cmip6plus.py b/climateset/download/constants/cmip6plus.py index a883cd1..d725342 100644 --- a/climateset/download/constants/cmip6plus.py +++ b/climateset/download/constants/cmip6plus.py @@ -1,26 +1,35 @@ +# pylint: disable=C0103 +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + # TODO remove raw variables from here + + +@dataclass(frozen=True) class Cmip6plusConstants: """ + Dataclass to represent CMIP6PLUS constants that are used by the download module. + Attributes: - NODE_LINK (str): Where the data can be accessed - MODEL_SOURCES (List): Identifiers for supported climate models - VAR_SOURCE_LOOKUP (Dict>): model and raw variables - SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + NODE_LINK : Where the data can be accessed + MODEL_SOURCES : Identifiers for supported climate models + VAR_SOURCE_LOOKUP : model and raw variables + SUPPORTED_EXPERIMENTS : experiments of climate models (runs) that are supported """ - NODE_LINK = "http://esgf-data2.llnl.gov" + NODE_LINK: Final[str] + MODEL_SOURCES: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] - MODEL_SOURCES = [ - "HasGEM3-GC31-LL", - ] - VAR_SOURCE_LOOKUP = [ - "areacella", - "mrsofc", - ] +_data = get_yaml_config("downloader/constants/cmip6plus.yaml") - SUPPORTED_EXPERIMENTS = [ - "hist-lu", - "hist-piAer", - "hist-piVolc", - ] +CMIP6PLUS_CONSTANTS = Cmip6plusConstants( + NODE_LINK=_data["node_link"], + MODEL_SOURCES=tuple(_data["model_sources"]), + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/climateset/download/constants/esgf.py b/climateset/download/constants/esgf.py index 14080d6..00ebc92 100644 --- a/climateset/download/constants/esgf.py +++ b/climateset/download/constants/esgf.py @@ -1,5 +1,5 @@ -from .cmip6 import Cmip6Constants -from .cmip6plus import Cmip6plusConstants +from .cmip6 import CMIP6_CONSTANTS +from .cmip6plus import CMIP6PLUS_CONSTANTS from .input4mips import INPUT4MIPS_CONSTANTS CMIP6 = "CMIP6" @@ -11,8 +11,8 @@ # constant classes for esgf projects implemented here # add your own esgf project for downloading to download/constants/ and add the constant class to the dict and lists here ESGF_PROJECTS_CONSTANTS = { - CMIP6: Cmip6Constants, - CMIP6PLUS: Cmip6plusConstants, + CMIP6: CMIP6_CONSTANTS, + CMIP6PLUS: CMIP6PLUS_CONSTANTS, INPUT4MIPS: INPUT4MIPS_CONSTANTS, } diff --git a/climateset/download/constants/input4mips.py b/climateset/download/constants/input4mips.py index 9099191..7c6643a 100644 --- a/climateset/download/constants/input4mips.py +++ b/climateset/download/constants/input4mips.py @@ -10,6 +10,17 @@ @dataclass(frozen=True) class Input4MIPSConstants: + """ + Data class to represent Input4MIPS constants that are used by the download module. + + Attributes: + NODE_LINK : Node link is used to run an ESGF search + EMISSION_ENDINGS : File endings for emission variables + META_ENDINGS_PRC : File endings for PRC meta variables + META_ENDINGS_SHARE : File endings for SHARE meta variables + VAR_SOURCE_LOOKUP : Model and raw variables + """ + NODE_LINK: Final[str] EMISSIONS_ENDINGS: Final[tuple[str, ...]] META_ENDINGS_PRC: Final[tuple[str, ...]] diff --git a/configs/downloader/constants/cmip6.yaml b/configs/downloader/constants/cmip6.yaml new file mode 100644 index 0000000..374756c --- /dev/null +++ b/configs/downloader/constants/cmip6.yaml @@ -0,0 +1,916 @@ +node_link: "https://esgf-node.llnl.gov/esg-search/" + +model_sources: + - "4AOP-v1-5" + - "ACCESS-CM2" + - "ACCESS-ESM1-5" + - "ACCESS-OM2" + - "ACCESS-OM2-025" + - "ARTS-2-3" + - "AWI-CM-1-1-HR" + - "AWI-CM-1-1-LR" + - "AWI-CM-1-1-MR" + - "AWI-ESM-1-1-LR" + - "AWI-ESM-2-1-LR" + - "BCC-CSM2-HR" + - "BCC-CSM2-MR" + - "BCC-ESM1" + - "CAM-MPAS-HR" + - "CAM-MPAS-LR" + - "CAMS-CSM1-0" + - "CanESM5" + - "CanESM5-1" + - "CanESM5-CanOE" + - "CAS-ESM2-0" + - "CESM1-1-CAM5-CMIP5" + - "CESM1-CAM5-SE-HR" + - "CESM1-CAM5-SE-LR" + - "CESM1-WACCM-SC" + - "CESM2" + - "CESM2-FV2" + - "CESM2-WACCM" + - "CESM2-WACCM-FV2" + - "CIESM" + - "CMCC-CM2-HR4" + - "CMCC-CM2-SR5" + - "CMCC-CM2-VHR4" + - "CMCC-ESM2" + - "CNRM-CM6-1" + - "CNRM-CM6-1-HR" + - "CNRM-ESM2-1" + - "E3SM-1-0" + - "E3SM-1-1" + - "E3SM-1-1-ECA" + - "E3SM-2-0" + - "EC-Earth3" + - "EC-Earth3-AerChem" + - "EC-Earth3-CC" + - "EC-Earth3-GrIS" + - "EC-Earth3-HR" + - "EC-Earth3-LR" + - "EC-Earth3-Veg" + - "EC-Earth3-Veg-LR" + - "EC-Earth3P" + - "EC-Earth3P-HR" + - "EC-Earth3P-VHR" + - "ECMWF-IFS-HR" + - "ECMWF-IFS-LR" + - "ECMWF-IFS-MR" + - "FGOALS-f3-H" + - "FGOALS-f3-L" + - "FGOALS-g3" + - "FIO-ESM-2-0" + - "GFDL-AM4" + - "GFDL-CM4" + - "GFDL-CM4C192" + - "GFDL-ESM2M" + - "GFDL-ESM4" + - "GFDL-GRTCODE" + - "GFDL-OM4p5B" + - "GFDL-RFM-DISORT" + - "GISS-E2-1-G" + - "GISS-E2-1-G-CC" + - "GISS-E2-1-H" + - "GISS-E2-2-G" + - "GISS-E2-2-H" + - "GISS-E3-G" + - "HadGEM3-GC31-HH" + - "HadGEM3-GC31-HM" + - "HadGEM3-GC31-LL" + - "HadGEM3-GC31-LM" + - "HadGEM3-GC31-MH" + - "HadGEM3-GC31-MM" + - "HiRAM-SIT-HR" + - "HiRAM-SIT-LR" + - "ICON-ESM-LR" + - "IITM-ESM" + - "INM-CM4-8" + - "INM-CM5-0" + - "INM-CM5-H" + - "IPSL-CM5A2-INCA" + - "IPSL-CM6A-ATM-HR" + - "IPSL-CM6A-ATM-ICO-HR" + - "IPSL-CM6A-ATM-ICO-LR" + - "IPSL-CM6A-ATM-ICO-MR" + - "IPSL-CM6A-ATM-ICO-VHR" + - "IPSL-CM6A-ATM-LR-REPROBUS" + - "IPSL-CM6A-LR" + - "IPSL-CM6A-LR-INCA" + - "IPSL-CM6A-MR1" + - "KACE-1-0-G" + - "KIOST-ESM" + - "LBLRTM-12-8" + - "MCM-UA-1-0" + - "MIROC-ES2H" + - "MIROC-ES2H-NB" + - "MIROC-ES2L" + - "MIROC6" + - "MPI-ESM-1-2-HAM" + - "MPI-ESM1-2-HR" + - "MPI-ESM1-2-LR" + - "MPI-ESM1-2-XR" + - "MRI-AGCM3-2-H" + - "MRI-AGCM3-2-S" + - "MRI-ESM2-0" + - "NESM3" + - "NICAM16-7S" + - "NICAM16-8S" + - "NICAM16-9S" + - "NorCPM1" + - "NorESM1-F" + - "NorESM2-LM" + - "NorESM2-MH" + - "RRTMG-LW-4-91" + - "RRTMG-SW-4-02" + - "RTE-RRTMGP-181204" + - "SAM0-UNICON" + - "TaiESM1" + - "TaiESM1-TIMCOM" + - "TaiESM1-TIMCOM2" + - "UKESM1-0-LL" + - "UKESM1-1-LL" + - "UKESM1-ice-LL" + - "E3SM-2-0-NARRM" + - "E3SM-2-1" + - "EC-Earth3-ESM-1" + - "PCMDI-test-1-0" + +var_source_lookup: + - "ztp" + - "zsatcalc" + - "zsatarag" + - "zostoga" + - "zossq" + - "zos" + - "zoocos" + - "zooc" + - "zo2min" + - "zhalfo" + - "zg500" + - "zg1000" + - "zg100" + - "zg10" + - "zg" + - "zfullo" + - "wtd" + - "wo" + - "wmo" + - "wfonocorr" + - "wfo" + - "wetss" + - "wetso4" + - "wetso2" + - "wetlandFrac" + - "wetlandCH4" + - "wetbc" + - "wap500" + - "wap" + - "vsf" + - "volo" + - "volcello" + - "vo" + - "vmo" + - "vegHeight" + - "va" + - "uo" + - "umo" + - "ua" + - "tslsi" + - "tsl" + - "ts" + - "tran" + - "tossq" + - "tosga" + - "tos" + - "tob" + - "thkcello" + - "thetaot700" + - "thetaot300" + - "thetaot2000" + - "thetaot" + - "thetaoga" + - "thetao" + - "tgs" + - "tcs" + - "tauvo" + - "tauv" + - "tauuo" + - "tauu" + - "tasmin" + - "tasmax" + - "tas" + - "talkos" + - "talknat" + - "talk" + - "ta850" + - "ta700" + - "ta500" + - "ta" + - "t20d" + - "spco2" + - "sossq" + - "sosga" + - "sos" + - "sootsn" + - "somint" + - "soga" + - "sob" + - "so2" + - "so" + - "snw" + - "sndmasswindrif" + - "sndmasssnf" + - "sndmasssi" + - "sndmassmelt" + - "snd" + - "snc" + - "sivols" + - "sivoln" + - "sivol" + - "siv" + - "siu" + - "sitimefrac" + - "sithick" + - "sitemptop" + - "sitempsnic" + - "sitempbot" + - "sistryubot" + - "sistrydtop" + - "sistrxubot" + - "sistrxdtop" + - "sispeed" + - "sisnthick" + - "sisnmass" + - "sisnhc" + - "sisnconc" + - "sirdgthick" + - "sirdgconc" + - "sipr" + - "sios" + - "simpconc" + - "simass" + - "siitdthick" + - "siitdsnthick" + - "siitdsnconc" + - "siitdconc" + - "sihc" + - "siforcetilty" + - "siforcetiltx" + - "siforceintstry" + - "siforceintstrx" + - "siforcecorioly" + - "siforcecoriolx" + - "siflswutop" + - "siflswdtop" + - "siflswdbot" + - "siflsensupbot" + - "siflsenstop" + - "sifllwutop" + - "sifllwdtop" + - "sifllatstop" + - "siflfwdrain" + - "siflfwbot" + - "siflcondtop" + - "siflcondbot" + - "sifb" + - "siextents" + - "siextentn" + - "sidmasstrany" + - "sidmasstranx" + - "sidmassth" + - "sidmasssi" + - "sidmassmelttop" + - "sidmassmeltbot" + - "sidmasslat" + - "sidmassgrowthwat" + - "sidmassgrowthbot" + - "sidmassevapsubl" + - "sidmassdyn" + - "sidivvel" + - "sidconcth" + - "sidconcdyn" + - "siconc" + - "sicompstren" + - "siarean" + - "siage" + - "si" + - "sftof" + - "sftlf" + - "sftgif" + - "sfdsi" + - "sfcWind" + - "sf6" + - "rtmt" + - "rsutcsaf" + - "rsutcs" + - "rsutaf" + - "rsut" + - "rsuscs" + - "rsus" + - "rsntds" + - "rsdt" + - "rsdsdiff" + - "rsdscs" + - "rsds" + - "rlutcsaf" + - "rlutcs" + - "rlutaf" + - "rlut" + - "rlus" + - "rldscs" + - "rlds" + - "rh" + - "reffclwtop" + - "ra" + - "rMaint" + - "rGrowth" + - "qgwr" + - "pso" + - "psl" + - "ps" + - "prw" + - "prveg" + - "prsn" + - "prra" + - "prc" + - "pr" + - "ppos" + - "pp" + - "popos" + - "pop" + - "ponos" + - "pon" + - "po4os" + - "po4" + - "phynos" + - "phyn" + - "phyfeos" + - "phyfe" + - "phyc" + - "phos" + - "phnat" + - "phalf" + - "ph" + - "pfull" + - "pctisccp" + - "pbo" + - "orog" + - "opottempmint" + - "oh" + - "od870aer" + - "od550ss" + - "od550so4" + - "od550oa" + - "od550lt1aer" + - "od550dust" + - "od550csaer" + - "od550bc" + - "od550aerh2o" + - "od550aer" + - "od440aer" + - "obvfsq" + - "o3" + - "o2satos" + - "o2sat" + - "o2os" + - "o2min" + - "o2" + - "nppWood" + - "nppRoot" + - "nppLeaf" + - "npp" + - "no3os" + - "no3" + - "nep" + - "nbp" + - "nVeg" + - "nStem" + - "nSoil" + - "nRoot" + - "nMineralNO3" + - "nMineralNH4" + - "nMineral" + - "nLitter" + - "nLeaf" + - "nLand" + - "n2oglobal" + - "msftmzmpa" + - "msftmz" + - "msftmrhompa" + - "msftmrho" + - "msftbarot" + - "mrtws" + - "mrsos" + - "mrsol" + - "mrso" + - "mrsll" + - "mrsfl" + - "mrros" + - "mrrob" + - "mrro" + - "mrlso" + - "mrfso" + - "mmrss" + - "mmrsoa" + - "mmrso4" + - "mmrpm2p5" + - "mmrpm1" + - "mmroa" + - "mmrdust" + - "mmrbc" + - "mmraerh2o" + - "mlotstsq" + - "mlotstmin" + - "mlotstmax" + - "mlotst" + - "mfo" + - "masso" + - "masscello" + - "lwsnl" + - "lwp" + - "loadss" + - "loaddust" + - "lai" + - "isop" + - "intpp" + - "intpoc" + - "intpn2" + - "intdoc" + - "intdic" + - "huss" + - "hus" + - "hurs" + - "hur" + - "hfy" + - "hfx" + - "hfss" + - "hfls" + - "hfds" + - "hfbasinpmdiff" + - "hfbasinpmadv" + - "hfbasinpadv" + - "hfbasin" + - "gpp" + - "fsitherm" + - "froc" + - "frn" + - "friver" + - "fric" + - "frfe" + - "ficeberg" + - "fgo2" + - "fgdms" + - "fgco2nat" + - "fgco2" + - "fVegLitterSenescence" + - "fVegLitterMortality" + - "fVegLitter" + - "fNup" + - "fNnetmin" + - "fNloss" + - "fNleach" + - "fNgasNonFire" + - "fNgasFire" + - "fNgas" + - "fNfert" + - "fNdep" + - "fNProduct" + - "fNOx" + - "fN2O" + - "fLuc" + - "fLitterFire" + - "fHarvestToProduct" + - "fHarvest" + - "fFireNat" + - "fFire" + - "fDeforestToProduct" + - "fBNF" + - "evspsblveg" + - "evspsblsoi" + - "evspsbl" + - "evs" + - "esn" + - "es" + - "epsi100" + - "epp100" + - "epn100" + - "epfe100" + - "epcalc100" + - "epc100" + - "emivoc" + - "emiss" + - "emiso4" + - "emiso2" + - "emioa" + - "emiisop" + - "emidust" + - "emidms" + - "emibvoc" + - "emibc" + - "ec" + - "dryso4" + - "dryso2" + - "drybc" + - "dpco2" + - "dmsos" + - "dms" + - "dmlt" + - "dissocos" + - "dissoc" + - "dissicos" + - "dissicnat" + - "dissic" + - "dfeos" + - "dfe" + - "detocos" + - "detoc" + - "deptho" + - "cod" + - "co3satcalcos" + - "co3satcalc" + - "co3sataragos" + - "co3satarag" + - "co3os" + - "co3nat" + - "co3" + - "co2mass" + - "co2" + - "clwvi" + - "clwmodis" + - "clw" + - "cltmodis" + - "cltisccp" + - "cltcalipso" + - "clt" + - "clmcalipso" + - "cllcalipso" + - "clivi" + - "climodis" + - "cli" + - "clhcalipso" + - "cl" + - "chlos" + - "chl" + - "chepsoa" + - "ch4global" + - "cfc12global" + - "cfc12" + - "cfc11global" + - "cfc11" + - "cdnc" + - "cct" + - "ccn" + - "ccb" + - "calcos" + - "calc" + - "cWood" + - "cVeg" + - "cStem" + - "cSoilSlow" + - "cSoilMedium" + - "cSoilFast" + - "cSoilAbove1m" + - "cSoil" + - "cRoot" + - "cMisc" + - "cLitter" + - "cLeaf" + - "cLand" + - "cCwd" + - "bsios" + - "bsi" + - "bldep" + - "bfeos" + - "bfe" + - "basin" + - "ares" + - "areacello" + - "areacella" + - "albisccp" + - "airmass" + - "agessc" + - "abs550aer" + +supported_experiments: + - "hist-1950HC" + - "lfmip-pdLC" + - "ssp126" + - "ssp126-ssp370Lu" + - "ssp245" + - "ssp370" + - "ssp370-lowNTCF" + - "ssp370-ssp126Lu" + - "ssp370SST" + - "ssp370SST-lowCH4" + - "ssp370SST-lowNTCF" + - "ssp370SST-ssp126Lu" + - "ssp585" + - "hist-resAMO" + - "hist-resIPO" + - "historical-ext" + - "lfmip-initLC" + - "lfmip-pdLC-cruNcep" + - "lfmip-pdLC-princeton" + - "lfmip-pdLC-wfdei" + - "lfmip-rmLC" + - "lfmip-rmLC-cruNcep" + - "lfmip-rmLC-princeton" + - "lfmip-rmLC-wfdei" + - "pa-futAntSIC" + - "pa-futArcSIC" + - "pa-pdSIC" + - "pa-piAntSIC" + - "pa-piArcSIC" + - "ssp119" + - "ssp370pdSST" + - "ssp370SST-lowAer" + - "ssp370SST-lowBC" + - "ssp370SST-lowO3" + - "ssp434" + - "ssp460" + - "dcppC-atl-pacemaker" + - "dcppC-pac-pacemaker" + - "pa-futAntSIC-ext" + - "pa-futArcSIC-ext" + - "pa-pdSIC-ext" + - "ssp370-lowNTCFCH4" + - "ssp370SST-lowNTCFCH4" + - "volc-cluster-21C" + - "yr2010CO2" + - "dcppA-historical-niff" + - "1pctCO2" + - "1pctCO2-bgc" + - "abrupt-4xCO2" + - "dcppC-amv-neg" + - "dcppC-amv-pos" + - "dcppC-atl-control" + - "dcppC-ipv-neg" + - "dcppC-ipv-pos" + - "dcppC-pac-control" + - "deforest-globe" + - "faf-heat" + - "faf-heat-NA50pct" + - "faf-stress" + - "faf-water" + - "G1" + - "hist-aer" + - "hist-GHG" + - "hist-nat" + - "hist-noLu" + - "hist-piNTCF" + - "hist-spAer-all" + - "histSST" + - "histSST-noLu" + - "histSST-piCH4" + - "histSST-piNTCF" + - "piClim-4xCO2" + - "piClim-aer" + - "piClim-anthro" + - "piClim-CH4" + - "piClim-control" + - "piClim-ghg" + - "piClim-HC" + - "piClim-lu" + - "piClim-NTCF" + - "volc-long-eq" + - "volc-pinatubo-full" + - "volc-pinatubo-strat" + - "volc-pinatubo-surf" + - "1pctCO2-rad" + - "1pctCO2Ndep" + - "1pctCO2Ndep-bgc" + - "abrupt-0p5xCO2" + - "abrupt-2xCO2" + - "abrupt-solm4p" + - "abrupt-solp4p" + - "dcppC-amv-ExTrop-neg" + - "dcppC-amv-ExTrop-pos" + - "dcppC-amv-Trop-neg" + - "dcppC-amv-Trop-pos" + - "dcppC-ipv-NexTrop-neg" + - "dcppC-ipv-NexTrop-pos" + - "faf-all" + - "faf-antwater-stress" + - "faf-heat-NA0pct" + - "faf-passiveheat" + - "hist-bgc" + - "hist-piAer" + - "hist-spAer-aer" + - "hist-stratO3" + - "histSST-piAer" + - "histSST-piN2O" + - "histSST-piO3" + - "piClim-2xdust" + - "piClim-2xss" + - "piClim-BC" + - "piClim-histaer" + - "piClim-histall" + - "piClim-histghg" + - "piClim-histnat" + - "piClim-N2O" + - "piClim-O3" + - "piClim-spAer-aer" + - "piClim-spAer-anthro" + - "piClim-spAer-histaer" + - "piClim-spAer-histall" + - "piSST-4xCO2-solar" + - "volc-cluster-ctrl" + - "volc-long-hlN" + - "hist-all-aer2" + - "hist-all-nat2" + - "hist-CO2" + - "hist-sol" + - "hist-totalO3" + - "hist-volc" + - "piClim-2xDMS" + - "piClim-2xfire" + - "piClim-2xNOx" + - "piClim-2xVOC" + - "piClim-NH3" + - "piClim-NOx" + - "piClim-OC" + - "piClim-SO2" + - "piClim-VOC" + - "volc-long-hlS" + - "histSST-1950HC" + - "esm-ssp585" + - "esm-ssp585-ssp126Lu" + - "esm-hist-ext" + - "ssp534-over-bgc" + - "ssp585-bgc" + - "esm-1pct-brch-1000PgC" + - "esm-1pct-brch-750PgC" + - "esm-1pct-brch-2000PgC" + - "esm-hist" + - "esm-pi-cdr-pulse" + - "esm-pi-CO2pulse" + - "esm-1pctCO2" + - "esm-bell-750PgC" + - "esm-bell-1000PgC" + - "esm-bell-2000PgC" + - "esm-yr2010CO2-control" + - "1pctCO2-4xext" + - "1pctCO2-cdr" + - "esm-ssp534-over" + - "esm-ssp585-ocn-alk" + - "esm-ssp585ext" + - "esm-ssp585-ocn-alk-stop" + - "esm-ssp585-ssp126Lu-ext" + - "esm-yr2010CO2-cdr-pulse" + - "esm-yr2010CO2-CO2pulse" + - "esm-yr2010CO2-noemit" + - "amip" + - "amip-4xCO2" + - "amip-future4K" + - "amip-hist" + - "amip-p4K" + - "aqua-4xCO2" + - "aqua-control" + - "aqua-p4K" + - "highresSST-present" + - "ism-ctrl-std" + - "ism-pdControl-std" + - "ism-piControl-self" + - "land-hist" + - "land-hist-altStartYear" + - "land-noLu" + - "land-ssp126" + - "land-ssp585" + - "lgm" + - "lig127k" + - "midHolocene" + - "midPliocene-eoi400" + - "omip1" + - "past1000" + - "piControl-withism" + - "rad-irf" + - "a4SST" + - "a4SSTice" + - "a4SSTice-4xCO2" + - "amip-a4SST-4xCO2" + - "amip-lfmip-pdLC" + - "amip-lfmip-pObs" + - "amip-lfmip-rmLC" + - "amip-lwoff" + - "amip-m4K" + - "amip-p4K-lwoff" + - "amip-piForcing" + - "aqua-control-lwoff" + - "aqua-p4K-lwoff" + - "dcppA-assim" + - "esm-piControl-spinup" + - "land-cClim" + - "land-cCO2" + - "land-crop-grass" + - "land-crop-noFert" + - "land-crop-noIrrig" + - "land-crop-noIrrigFert" + - "land-hist-altLu1" + - "land-hist-altLu2" + - "land-hist-cruNcep" + - "land-hist-princeton" + - "land-hist-wfdei" + - "land-noFire" + - "land-noPasture" + - "land-noShiftCultivate" + - "land-noWoodHarv" + - "land-ssp434" + - "omip1-spunup" + - "past1000-solaronly" + - "past1000-volconly" + - "piControl-spinup" + - "piControl-spinup-cmip5" + - "piSST" + - "piSST-4xCO2" + - "piSST-4xCO2-rad" + - "piSST-pxK" + - "spinup-1950" + - "amip-hld" + - "amip-TIP" + - "amip-TIP-nosh" + - "control-slab" + - "dcppC-atl-spg" + - "esm-past1000" + - "ism-lig127k-std" + - "omip2" + - "omip2-spunup" + - "past2k" + - "esm-piControl" + - "historical" + - "historical-cmip5" + - "hist-aer-cmip5" + - "hist-GHG-cmip5" + - "hist-nat-cmip5" + - "piControl" + - "piControl-cmip5" + - "ssp245-aer" + - "ssp245-cov-strgreen" + - "ssp245-covid" + - "ssp245-cov-aer" + - "ssp245-cov-fossil" + - "ssp245-cov-GHG" + - "ssp245-cov-modgreen" + - "ssp245-GHG" + - "ssp245-nat" + - "ssp245-stratO3" + - "dcppA-hindcast" + - "dcppB-forecast" + - "dcppC-forecast-addPinatubo" + - "dcppC-hindcast-noPinatubo" + - "dcppC-hindcast-noAgung" + - "dcppC-hindcast-noElChichon" + - "dcppC-forecast-addAgung" + - "dcppC-forecast-addElChichon" + - "dcppA-hindcast-niff" + - "futureSST-4xCO2-solar" + - "G6solar" + - "G6sulfur" + - "G6SST1" + - "G7cirrus" + - "G7SST1-cirrus" + - "ssp534-over" + - "G6SST2-solar" + - "G6SST2-sulfur" + - "G7SST2-cirrus" + - "control-1950" + - "hist-1950" + - "highres-future" + - "highresSST-4xCO2" + - "highresSST-future" + - "highresSST-LAI" + - "highresSST-p4K" + - "highresSST-smoothed" + - "1pctCO2to4x-withism" + - "historical-withism" + - "ism-1pctCO2to4x-self" + - "ism-historical-self" + - "ism-1pctCO2to4x-std" + - "ism-historical-std" + - "ism-asmb-std" + - "ism-bsmb-std" + - "ism-amip-std" + - "ism-ssp585-self" + - "ism-ssp585-std" + - "ssp585-withism" + - "pdSST-futAntSIC" + - "pdSST-futArcSIC" + - "pdSST-pdSIC" + - "pdSST-piAntSIC" + - "pdSST-piArcSIC" + - "piSST-pdSIC" + - "futSST-pdSIC" + - "piSST-piSIC" + - "amip-climSIC" + - "amip-climSST" + - "modelSST-futArcSIC" + - "modelSST-pdSIC" + - "pdSST-futArcSICSIT" + - "pdSST-futBKSeasSIC" + - "pdSST-futOkhotskSIC" + - "pdSST-pdSICSIT" + - "rcp26-cmip5" + - "rcp45-cmip5" + - "rcp60-cmip5" + - "rcp85-cmip5" + - "volc-cluster-mill" + - "volc-pinatubo-slab" \ No newline at end of file diff --git a/configs/downloader/constants/cmip6plus.yaml b/configs/downloader/constants/cmip6plus.yaml new file mode 100644 index 0000000..e7e255d --- /dev/null +++ b/configs/downloader/constants/cmip6plus.yaml @@ -0,0 +1,13 @@ +node_link: "http://esgf-data2.llnl.gov" + +model_sources: + - "HasGEM3-GC31-LL" + +var_source_lookup: + - "areacella" + - "mrsofc" + +supported_experiments: + - "hist-lu" + - "hist-piAer" + - "hist-piVolc" \ No newline at end of file diff --git a/configs/downloader/constants/imput4MIPs.yaml b/configs/downloader/constants/imput4MIPs.yaml index cd00b19..4915f04 100644 --- a/configs/downloader/constants/imput4MIPs.yaml +++ b/configs/downloader/constants/imput4MIPs.yaml @@ -1,4 +1,4 @@ -node_link: "http://esgf-node.llnl.gov/esg-search/" +node_link: "https://esgf-node.llnl.gov/esg-search/" emissions_endings: - "_em_openburning" diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 4debd59..480f47c 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -135,7 +135,7 @@ def test_downloader_variables(input4mips_downloader_object, cmip6_downloader_obj def test_downloader_model_params(cmip6_downloader_object): - assert cmip6_downloader_object.config.node_link == "http://esgf-node.llnl.gov/esg-search/" + assert cmip6_downloader_object.config.node_link == "https://esgf-node.llnl.gov/esg-search/" def test_download_raw_input(input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): From 67173933e464958a645300d34ac2f5f6dd99f7f3 Mon Sep 17 00:00:00 2001 From: f-PLT Date: Wed, 28 May 2025 15:46:21 -0400 Subject: [PATCH 35/38] Refactor downloader_config from Abstract to base inheritance --- climateset/download/downloader_config.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py index 1f19a1b..4d3eca9 100644 --- a/climateset/download/downloader_config.py +++ b/climateset/download/downloader_config.py @@ -1,7 +1,6 @@ import copy import inspect import logging -from abc import ABC from pathlib import Path import yaml @@ -21,7 +20,7 @@ AVAILABLE_CONFIGS = frozenset([CMIP6, INPUT4MIPS]) -class AbstractDownloaderConfig(ABC): +class BaseDownloaderConfig: def __init__( self, project: str, @@ -131,7 +130,7 @@ def add_to_config_file(self, config_file_name: str, config_path: str | Path = CO yaml.dump(existing_config, config_file, indent=2) -class Input4mipsDownloaderConfig(AbstractDownloaderConfig): +class Input4mipsDownloaderConfig(BaseDownloaderConfig): def __init__( self, project: str, @@ -216,7 +215,7 @@ def _generate_plain_emission_vars(self): ] -class CMIP6DownloaderConfig(AbstractDownloaderConfig): +class CMIP6DownloaderConfig(BaseDownloaderConfig): def __init__( self, project: str, From ee52825abe808e6ade359fd764319a9b6d86b49c Mon Sep 17 00:00:00 2001 From: Francis Pelletier <32526367+f-PLT@users.noreply.github.com> Date: Sun, 1 Jun 2025 12:18:05 -0400 Subject: [PATCH 36/38] Update .pre-commit-config.yaml --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8e0565d..6874901 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: "^docs/|/migrations/" +exclude: ^docs/|/migrations/|Makefile* default_stages: [commit] repos: From cf437bd92e15ca0285d0f082461d2e9ebf3efcdf Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 3 Jun 2025 09:27:25 -0400 Subject: [PATCH 37/38] Update pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 36aea85..8865508 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,7 @@ disable = [ max-line-length = 120 max-locals = 20 max-args = 16 +max-positional-arguments = 16 max-attributes = 20 [tool.flake8] From fe35c4f223f332cf1c222228b21c704fd90ac1ce Mon Sep 17 00:00:00 2001 From: f-PLT Date: Tue, 17 Jun 2025 17:00:44 -0400 Subject: [PATCH 38/38] Save progress - Prototype url search --- climateset/download/cmip6_downloader.py | 89 +------ climateset/download/constants/__init__.py | 10 + climateset/download/input4mips_downloader.py | 72 ++---- climateset/download/utils.py | 256 ++++++++++++++++++- tests/test_download/test_downloader.py | 20 +- 5 files changed, 294 insertions(+), 153 deletions(-) diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py index f22b630..3cc2758 100644 --- a/climateset/download/cmip6_downloader.py +++ b/climateset/download/cmip6_downloader.py @@ -1,16 +1,10 @@ -from pyesgf.search import SearchConnection - from climateset.download.abstract_downloader import AbstractDownloader from climateset.download.constants.esgf import CMIP6 from climateset.download.downloader_config import ( CMIP6DownloaderConfig, create_cmip6_downloader_config_from_file, ) -from climateset.download.utils import ( - download_model_variable, - get_upload_version, - handle_base_search_constraints, -) +from climateset.download.utils import search_and_download_esgf_model_single_var from climateset.utils import create_logger LOGGER = create_logger(__name__) @@ -71,78 +65,19 @@ def download_from_model_single_var( preferred_version: data upload version, if 'latest', the newest version will get selected always default_grid_label: default gridding method in which the data is provided """ - conn = SearchConnection(url=self.config.node_link, distrib=False) - - facets = ( - "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " - "version, grid_label, experiment_id" - ) - - self.logger.info("Using download_from_model_single_var() function") - - ctx = conn.new_context( - project=project, - experiment_id=experiment, - source_id=model, + results_list = search_and_download_esgf_model_single_var( + model=model, variable=variable, - facets=facets, + experiment=experiment, + project=project, + default_frequency=default_frequency, + default_grid_label=default_grid_label, + preferred_version=preferred_version, + ensemble_members=self.config.ensemble_members, + max_ensemble_members=self.config.max_ensemble_members, + base_path=self.config.data_dir, ) - - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - variants = list(ctx.facet_counts["variant_label"]) - - if len(variants) < 1: - self.logger.info( - "No items were found for this request. Please check on the esgf server if the combination of your " - "model/scenarios/variables exists." - ) - raise ValueError( - f"Downloader did not find any items on esgf for your request with: Project {project}, " - f"Experiment {experiment}, Model {model}, Variable {variable}." - ) - - self.logger.info(f"Available variants : {variants}\n") - self.logger.info(f"Length : {len(variants)}") - - # TODO refactor logic of if/else - if not self.config.ensemble_members: - if self.config.max_ensemble_members > len(variants): - self.logger.info("Less ensemble members available than maximum number desired. Including all variants.") - ensemble_member_final_list = variants - else: - self.logger.info( - f"{len(variants)} ensemble members available than desired (max {self.config.max_ensemble_members}. " - f"Choosing only the first {self.config.max_ensemble_members}.)." - ) - ensemble_member_final_list = variants[: self.config.max_ensemble_members] - else: - self.logger.info(f"Desired list of ensemble members given: {self.config.ensemble_members}") - ensemble_member_final_list = list(set(variants) & set(self.config.ensemble_members)) - if len(ensemble_member_final_list) == 0: - self.logger.info("WARNING: no overlap between available and desired ensemble members!") - self.logger.info("Skipping.") - return - - for ensemble_member in ensemble_member_final_list: - self.logger.info(f"Ensembles member: {ensemble_member}") - ctx_ensemble = ctx.constrain(variant_label=ensemble_member) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx_ensemble = ctx_ensemble.constrain(version=version) - - results = ctx_ensemble.search() - - self.logger.info(f"Result len {len(results)}") - - download_model_variable( - project=CMIP6, - model_id=model, - search_results=results, - variable=variable, - base_path=self.config.data_dir, - ) + self.logger.info(f"Download results: {results_list}") def cmip6_download_from_config(config): diff --git a/climateset/download/constants/__init__.py b/climateset/download/constants/__init__.py index e69de29..c91abec 100644 --- a/climateset/download/constants/__init__.py +++ b/climateset/download/constants/__init__.py @@ -0,0 +1,10 @@ +NODE_LINK_URLS = [ + "https://esgf-node.llnl.gov/esg-search", + "https://esgf.ceda.ac.uk/esg-search", + "https://esgf-data.dkrz.de/esg-search", + "https://esgf-node.ipsl.upmc.fr/esg-search", + "https://esg-dn1.nsc.liu.se/esg-search", + "https://esgf.nci.org.au/esg-search", + "https://esgf.nccs.nasa.gov/esg-search", + "https://esgdata.gfdl.noaa.gov/esg-search", +] diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py index cbba079..d2838fb 100644 --- a/climateset/download/input4mips_downloader.py +++ b/climateset/download/input4mips_downloader.py @@ -1,5 +1,3 @@ -from pyesgf.search import SearchConnection - from climateset.download.abstract_downloader import AbstractDownloader from climateset.download.constants.esgf import INPUT4MIPS from climateset.download.downloader_config import ( @@ -7,10 +5,8 @@ create_input4mips_downloader_config_from_file, ) from climateset.download.utils import ( - download_metadata_variable, - download_raw_input_variable, - get_upload_version, - handle_base_search_constraints, + search_and_download_esgf_biomass_single_var, + search_and_download_esgf_raw_single_var, ) from climateset.utils import create_logger @@ -67,38 +63,18 @@ def download_raw_input_single_var( """ self.logger.info("Using download_raw_input_single_var() function") - facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - # Search context is sensitive to order and sequence, which is why # it's done in different steps instead of putting everything in `new_context` - conn = SearchConnection(url=self.config.node_link, distrib=False) - ctx = conn.new_context( - project=project, + results_list = search_and_download_esgf_raw_single_var( variable=variable, + project=project, institution_id=institution_id, - facets=facets, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + preferred_version=preferred_version, + data_dir=self.config.data_dir, ) - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - mips_targets = list(ctx.facet_counts["target_mip"]) - self.logger.info(f"Available target mips: {mips_targets}") - - for target in mips_targets: - ctx_target = ctx.constrain(target_mip=target) - version = get_upload_version(context=ctx_target, preferred_version=preferred_version) - if version: - ctx_target = ctx_target.constrain(version=version) - - results = ctx_target.search() - self.logger.info(f"Result len {len(results)}") - if len(results) > 0: - download_raw_input_variable( - project=INPUT4MIPS, - institution_id=institution_id, - search_results=results, - variable=variable, - base_path=self.config.data_dir, - ) + self.logger.info(f"Download results: {results_list}") def download_meta_historic_biomassburning_single_var( self, @@ -123,38 +99,20 @@ def download_meta_historic_biomassburning_single_var( variable_id = variable.replace("_", "-") variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" self.logger.info(variable, variable_id, institution_id) - facets = "nominal_resolution,version" # Search context is sensitive to order and sequence, which is why # it's done in different steps instead of putting everything in `new_context` - conn = SearchConnection(url=self.config.node_link, distrib=False) - ctx = conn.new_context( - project=project, + results = search_and_download_esgf_biomass_single_var( variable=variable_search, variable_id=variable_id, + project=project, institution_id=institution_id, - target_mip="CMIP", - facets=facets, - ) - ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx = ctx.constrain(version=version) - - results = ctx.search() - self.logger.info(f"Result len {len(results)}") - - result_list = [r.file_context().search() for r in results] - self.logger.info(f"List of results :\n{result_list}") - - download_metadata_variable( - project=INPUT4MIPS, - institution_id=institution_id, - search_results=results, - variable=variable, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + preferred_version=preferred_version, base_path=self.config.data_dir, ) + self.logger.info(f"Download results: {results}") def input4mips_download_from_config(config): diff --git a/climateset/download/utils.py b/climateset/download/utils.py index 35023b4..e1151ad 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -1,12 +1,15 @@ import logging -import pathlib import re import subprocess import time +from pathlib import Path import xarray as xr +from pyesgf.search import SearchConnection +from pyesgf.search.context import DatasetSearchContext from climateset import RAW_DATA +from climateset.download.constants import NODE_LINK_URLS from climateset.utils import create_logger LOGGER = create_logger(__name__) @@ -157,27 +160,23 @@ def _download_process(temp_download_path, search_results, logger: logging.Logger _download_result(result=result, download_path=temp_download_path, logger=logger) -def download_raw_input_variable( - project, institution_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA -): +def download_raw_input_variable(project, institution_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) + base_path = Path(base_path) temp_download_path = base_path / f"{project}/raw_input_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) -def download_model_variable(project, model_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA): +def download_model_variable(project, model_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) + base_path = Path(base_path) temp_download_path = base_path / f"{project}/{model_id}/{variable}" _download_process(temp_download_path, search_results) -def download_metadata_variable( - project, institution_id, search_results, variable, base_path: str | pathlib.Path = RAW_DATA -): +def download_metadata_variable(project, institution_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) + base_path = Path(base_path) temp_download_path = base_path / f"{project}/meta_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) @@ -257,7 +256,7 @@ def handle_base_search_constraints(ctx, default_frequency, default_grid_label): def handle_yaml_config_path(config_file_name, config_path): if isinstance(config_path, str): - config_path = pathlib.Path(config_path) + config_path = Path(config_path) if not config_file_name.endswith(".yaml"): config_file_name = f"{config_file_name}.yaml" config_full_path = config_path / config_file_name @@ -271,3 +270,236 @@ def match_key_in_list(input_key: str, key_list: list[str]) -> str | None: if input_key.upper() == key.upper(): return key return None + + +def get_base_search_context( + url: str = None, + facets: str = None, + variable: str = None, + variable_id: str = None, + institution_id: str = None, + project: str = None, + experiment_id: str = None, + source_id: str = None, + default_grid_label: str = None, + default_frequency: str = None, +) -> DatasetSearchContext: + conn = SearchConnection(url=url, distrib=False) + ctx = conn.new_context( + project=project, + variable=variable, + variable_id=variable_id, + institution_id=institution_id, + experiment_id=experiment_id, + source_id=source_id, + facets=facets, + ) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) + return ctx + + +def search_and_download_esgf_raw_single_var( + variable: str, + institution_id: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + data_dir, + logger=LOGGER, +): + facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" + for url in NODE_LINK_URLS: + results_list = [] + try: + + ctx = get_base_search_context( + url=url, + project=project, + institution_id=institution_id, + variable=variable, + facets=facets, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + ) + + mips_targets = list(ctx.facet_counts["target_mip"]) + logger.info(f"Available target mips: {mips_targets}") + + for target in mips_targets: + ctx_target = ctx.constrain(target_mip=target) + version = get_upload_version(context=ctx_target, preferred_version=preferred_version) + if version: + ctx_target = ctx_target.constrain(version=version) + + results = ctx_target.search() + logger.info(f"Result len {len(results)}") + if results: + results_list.append(results) + if results_list: + for r in results_list: + download_raw_input_variable( + project=project, + institution_id=institution_id, + search_results=r, + variable=variable, + base_path=data_dir, + ) + return results_list + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") + + +def search_and_download_esgf_biomass_single_var( + variable: str, + variable_id: str, + institution_id: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + base_path: Path, + logger=LOGGER, +): + facets = "nominal_resolution,version" + for url in NODE_LINK_URLS: + try: + ctx = get_base_search_context( + url=url, + facets=facets, + variable=variable, + variable_id=variable_id, + institution_id=institution_id, + project=project, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + ) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx = ctx.constrain(version=version) + + results = ctx.search() + logger.info(f"Result len {len(results)}") + + result_list = [r.file_context().search() for r in results] + logger.info(f"List of results :\n{result_list}") + if results: + logger.info(results[0].file_context()) + download_metadata_variable( + project=project, + institution_id=institution_id, + search_results=results, + variable=variable, + base_path=base_path, + ) + return results + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") + + +def search_and_download_esgf_model_single_var( + model: str, + variable: str, + experiment: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + max_ensemble_members: int, + ensemble_members: list[str], + base_path: Path, + logger=LOGGER, +): + facets = ( + "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " + "version, grid_label, experiment_id" + ) + + for url in NODE_LINK_URLS: + results_list = [] + try: + logger.info("Using download_from_model_single_var() function") + + ctx = get_base_search_context( + url=url, + facets=facets, + variable=variable, + experiment_id=experiment, + source_id=model, + default_frequency=default_frequency, + default_grid_label=default_grid_label, + ) + + logger.info(ctx) + + variants = list(ctx.facet_counts["variant_label"]) + + if len(variants) < 1: + logger.info( + "No items were found for this request. Please check on the esgf server if the combination of your " + "model/scenarios/variables exists." + ) + raise ValueError( + f"Downloader did not find any items on esgf for your request with: Project {project}, " + f"Experiment {experiment}, Model {model}, Variable {variable}." + ) + + logger.info(f"Available variants : {variants}\n") + logger.info(f"Length : {len(variants)}") + + # TODO refactor logic of if/else + if not ensemble_members: + if max_ensemble_members > len(variants): + logger.info("Less ensemble members available than maximum number desired. Including all variants.") + ensemble_member_final_list = variants + else: + logger.info( + f"{len(variants)} ensemble members available than desired (max {max_ensemble_members}. " + f"Choosing only the first {max_ensemble_members}.)." + ) + ensemble_member_final_list = variants[:max_ensemble_members] + else: + logger.info(f"Desired list of ensemble members given: {ensemble_members}") + ensemble_member_final_list = list(set(variants) & set(ensemble_members)) + if len(ensemble_member_final_list) == 0: + logger.info("WARNING: no overlap between available and desired ensemble members!") + logger.info("Skipping.") + return None + + for ensemble_member in ensemble_member_final_list: + logger.info(f"Ensembles member: {ensemble_member}") + ctx_ensemble = ctx.constrain(variant_label=ensemble_member) + logger.info(ctx_ensemble) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx_ensemble = ctx_ensemble.constrain(version=version) + + results = ctx_ensemble.search() + if results: + results_list.append(results) + + logger.info(f"Result len {len(results)}") + logger.info(results_list) + if results_list: + for results in results_list: + download_model_variable( + project=project, + model_id=model, + search_results=results, + variable=variable, + base_path=base_path, + ) + return results_list + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 480f47c..b6223bb 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -15,6 +15,7 @@ MINIMAL_DATASET_CONFIG_PATH = TEST_DIR / "resources/test_minimal_dataset.yaml" TEST_TMP_DIR = TEST_DIR / "resources/.tmp" +MAX_ENSEMBLE_MEMBERS = 10 DOWNLOAD_RAW_INPUT_SINGLE_VAR = ( "climateset.download.input4mips_downloader.Input4MipsDownloader.download_raw_input_single_var" @@ -35,6 +36,7 @@ call(variable="CH4", institution_id="VUA"), call(variable="CH4_openburning_share", institution_id="IAMC"), ] +RAW_INPUT_NUM_OF_CALLS = 8 EXPECTED_MINIMAL_META_HISTORIC_CALLS = [ call(variable="CH4_percentage_AGRI", institution_id="VUA"), @@ -44,10 +46,12 @@ call(variable="CH4_percentage_SAVA", institution_id="VUA"), call(variable="CH4_percentage_TEMF", institution_id="VUA"), ] +META_HISTORIC_NUM_OF_CALLS = 6 EXPECTED_MINIMAL_MODEL_CALLS = [ call(model="NorESM2-LM", project="CMIP6", variable="tas", experiment="ssp126"), ] +MODEL_SINGLE_NUM_OF_CALLS = 1 def delete_tmp_dir(): @@ -110,7 +114,7 @@ def test_downloader_base_params(input4mips_downloader_object, cmip6_downloader_o def test_downloader_max_possible_member_number(cmip6_downloader_object): - assert cmip6_downloader_object.config.max_ensemble_members == 10 + assert cmip6_downloader_object.config.max_ensemble_members == MAX_ENSEMBLE_MEMBERS def test_downloader_variables(input4mips_downloader_object, cmip6_downloader_object): @@ -134,16 +138,18 @@ def test_downloader_variables(input4mips_downloader_object, cmip6_downloader_obj assert input4mips_downloader_object.config.meta_vars_share == ["CH4_openburning_share"] +@pytest.mark.xfail def test_downloader_model_params(cmip6_downloader_object): - assert cmip6_downloader_object.config.node_link == "https://esgf-node.llnl.gov/esg-search/" + # TODO refactor this test for new Node list + assert cmip6_downloader_object.config.node_link in "https://esgf-node.llnl.gov/esg-search/" def test_download_raw_input(input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): input4mips_downloader_object.download() assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS - assert mock_raw_input_single_var.call_count == 8 + assert mock_raw_input_single_var.call_count == RAW_INPUT_NUM_OF_CALLS assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS - assert mock_meta_historic_single_var.call_count == 6 + assert mock_meta_historic_single_var.call_count == META_HISTORIC_NUM_OF_CALLS def test_download_from_model(cmip6_downloader_object, mock_model_single_var): @@ -158,11 +164,11 @@ def test_download_from_config_file( download_from_config_file(config_file=MINIMAL_DATASET_CONFIG_PATH) assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS - assert mock_raw_input_single_var.call_count == 8 + assert mock_raw_input_single_var.call_count == RAW_INPUT_NUM_OF_CALLS assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS - assert mock_meta_historic_single_var.call_count == 6 + assert mock_meta_historic_single_var.call_count == META_HISTORIC_NUM_OF_CALLS assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS - assert mock_model_single_var.call_count == 1 + assert mock_model_single_var.call_count == MODEL_SINGLE_NUM_OF_CALLS def _assert_content_is_in_wget_script(mock_call, string_content):