diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 55cd09f..f83e30c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,8 +50,11 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' - name: Install dependencies + env: + CONDA_TOOL: mamba run: | - make CONDA_TOOL=mamba install + make poetry-install-venv + make install - name: Run linting checks run: | diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml index 8b6a139..b3079ee 100644 --- a/.github/workflows/precommit.yml +++ b/.github/workflows/precommit.yml @@ -50,8 +50,11 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' - name: Install dependencies + env: + CONDA_TOOL: mamba run: | - make CONDA_TOOL=mamba install + make poetry-install-venv + make install - name: Run Pre-commit checks run: | diff --git a/.make/base.make b/.make/base.make index 9342369..3f38651 100644 --- a/.make/base.make +++ b/.make/base.make @@ -3,7 +3,7 @@ # If necessary, override the corresponding variable and/or target, or create new ones # in one of the following files, depending on the nature of the override : # -# Makefile.variables, Makefile.targets or Makefile.private`, +# Makefile.variables, Makefile.targets or Makefile.private, # # The only valid reason to modify this file is to fix a bug or to add new # files to include. @@ -16,7 +16,7 @@ PROJECT_PATH := $(dir $(abspath $(firstword $(MAKEFILE_LIST)))) MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) SHELL := /usr/bin/env bash BUMP_TOOL := bump-my-version -MAKEFILE_VERSION := 0.2.0 +MAKEFILE_VERSION := 0.5.0 DOCKER_COMPOSE ?= docker compose AUTO_INSTALL ?= @@ -24,6 +24,12 @@ AUTO_INSTALL ?= # CONDA_TOOL can be overridden in Makefile.private file CONDA_TOOL := conda CONDA_ENVIRONMENT ?= +CONDA_YES_OPTION ?= + +# Default environment to install package +# Can be overridden in Makefile.private file +DEFAULT_INSTALL_ENV ?= +DEFAULT_POETRY_INSTALL_ENV ?= # Colors _SECTION := \033[1m\033[34m @@ -31,6 +37,11 @@ _TARGET := \033[36m _NORMAL := \033[0m .DEFAULT_GOAL := help + +# Project and Private variables and targets import to override variables for local +# This is to make sure, sometimes the Makefile includes don't work. +-include Makefile.variables +-include Makefile.private ## -- Informative targets ------------------------------------------------------------------------------------------- ## .PHONY: all @@ -69,167 +80,153 @@ targets: help version: ## display current version @echo "version: $(APP_VERSION)" -## -- Conda targets ------------------------------------------------------------------------------------------------- ## +## -- Virtualenv targets -------------------------------------------------------------------------------------------- ## -.PHONY: conda-install -conda-install: ## Install Conda on your local machine - @echo "Looking for [$(CONDA_TOOL)]..."; \ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo " "; \ - echo "Your defined Conda tool [$(CONDA_TOOL)] has not been found."; \ - echo " "; \ - echo "If you know you already have [$(CONDA_TOOL)] or some other Conda tool installed,"; \ - echo "Check your [CONDA_TOOL] variable in the Makefile.private for typos."; \ - echo " "; \ - echo "If your conda tool has not been initiated through your .bashrc file,"; \ - echo "consider using the full path to its executable instead when"; \ - echo "defining your [CONDA_TOOL] variable"; \ - echo " "; \ - echo "If in doubt, don't install Conda and manually create and activate"; \ - echo "your own Python environment."; \ - echo " "; \ - echo -n "Would you like to install Miniconda ? [y/N]: "; \ - read ans; \ - case $$ans in \ - [Yy]*) \ - echo "Fetching and installing miniconda"; \ - echo " "; \ - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh; \ - bash ~/miniconda.sh -b -p $${HOME}/.conda; \ - export PATH=$${HOME}/.conda/bin:$$PATH; \ - conda init; \ - /usr/bin/rm ~/miniconda.sh; \ - ;; \ - *) \ - echo "Skipping installation."; \ - echo " "; \ - ;; \ - esac; \ - else \ - echo "Conda tool [$(CONDA_TOOL)] has been found, skipping installation"; \ - fi; +VENV_PATH := $(PROJECT_PATH).venv +VENV_ACTIVATE := $(VENV_PATH)/bin/activate -.PHONY: conda-create-env -conda-create-env: conda-install ## Create a local Conda environment based on `environment.yml` file - @$(CONDA_TOOL) env create -f environment.yml +.PHONY: venv-create +venv-create: ## Create a virtualenv '.venv' at the root of the project folder + @virtualenv $(VENV_PATH) + @make -s venv-activate -.PHONY: conda-env-info -conda-env-info: ## Print information about active Conda environment using - @$(CONDA_TOOL) info +.PHONY: venv-activate +venv-activate: ## Print out the shell command to activate the project's virtualenv. + @echo "source $(VENV_ACTIVATE)" -.PHONY: _conda-poetry-install -_conda-poetry-install: - $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) install -c conda-forge poetry; \ - CURRENT_VERSION=$$(poetry --version | awk '{print $$NF}' | tr -d ')'); \ - REQUIRED_VERSION="1.6.0"; \ - if [ "$$(printf '%s\n' "$$REQUIRED_VERSION" "$$CURRENT_VERSION" | sort -V | head -n1)" != "$$REQUIRED_VERSION" ]; then \ - echo "Poetry installed version $$CURRENT_VERSION is less than minimal version $$REQUIRED_VERSION, fixing urllib3 version to prevent problems"; \ - poetry add "urllib3<2.0.0"; \ - fi; +## -- Poetry targets ------------------------------------------------------------------------------------------------ ## -.PHONY:conda-poetry-install -conda-poetry-install: ## Install Poetry in currently active Conda environment. Will fail if Conda is not found +.PHONY: poetry-install-auto +poetry-install-auto: ## Install Poetry in Conda environment, or with pipx in a virtualenv if Conda not found @poetry --version; \ if [ $$? != "0" ]; then \ echo "Poetry not found, proceeding to install Poetry..."; \ - echo "Looking for [$(CONDA_TOOL)]...";\ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo "$(CONDA_TOOL) not found; Poetry will not be installed"; \ - else \ - echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ - make -s _conda-poetry-install; \ + if [ "$(DEFAULT_POETRY_INSTALL_ENV)" == "conda" ]; then \ + ans_where="conda"; \ + elif [ "$(DEFAULT_POETRY_INSTALL_ENV)" == "venv" ]; then \ + ans_where="venv"; \ + else\ + echo -n "Where would you like to install Poetry, in a dedicated virtualenv (venv), or a conda environment? [venv/conda]: "; \ + read ans_where; \ fi; \ + case $$ans_where in \ + "venv" | "Venv" |"VENV") \ + make AUTO_INSTALL=true -s poetry-install-venv; \ + ;; \ + "conda" | "Conda" | "CONDA") \ + echo "Installing poetry with Conda"; \ + make AUTO_INSTALL=true -s conda-poetry-install; \ + ;; \ + *) \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Option $$ans_how not found, exiting process."; \ + echo ""; \ + exit 1; \ + esac; \ fi; -.PHONY: conda-poetry-uninstall -conda-poetry-uninstall: ## Uninstall Poetry located in currently active Conda environment - $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) remove poetry - -.PHONY: conda-clean-env -conda-clean-env: ## Completely removes local project's Conda environment - $(CONDA_TOOL) env remove -n $(CONDA_ENVIRONMENT) +.PHONY: _pipx_install_poetry +_pipx_install_poetry: + @output="$$(pip install poetry --dry-run)"; \ + if echo "$$output" | grep -q computecanada ; then \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Compute Canada (DRAC) environment detected: Installing Poetry < 2.0.0"; \ + echo ""; \ + pipx install 'poetry<2.0.0' ; \ + else \ + pipx install poetry ; \ + fi; -## -- Poetry targets ------------------------------------------------------------------------------------------------ ## -.PHONY: poetry-install-auto -poetry-install-auto: ## Install Poetry in activated Conda environment, or with pipx if Conda not found - @poetry --version; \ - if [ $$? != "0" ]; then \ - echo "Poetry not found, proceeding to install Poetry..."; \ - echo "Looking for [$(CONDA_TOOL)]...";\ - $(CONDA_TOOL) --version; \ - if [ $$? != "0" ]; then \ - echo "$(CONDA_TOOL) not found, trying with pipx"; \ - pipx --version; \ +.PHONY: poetry-install +poetry-install: ## Install standalone Poetry using pipx. Will ask where to install pipx. + @echo "Looking for Poetry version...";\ + poetry --version; \ + if [ $$? != "0" ]; then \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo "Poetry not found..."; \ + echo "Looking for pipx version...";\ + pipx_found=0; \ + pipx --version; \ if [ $$? != "0" ]; then \ - echo "pipx not found; installing pipx"; \ - pip install --user pipx; \ - pipx ensurepath; \ + pipx_found=1; \ + echo "pipx not found..."; \ + echo""; \ + echo -n "Would you like to install pipx and Poetry? [y/N]: "; \ + else \ + echo""; \ + echo -n "Would you like to install Poetry using pipx? [y/N]: "; \ fi; \ - pipx install poetry; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + if [ $$pipx_found == "1" ]; then \ + echo""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m The following pip has been found and will be used to install pipx: "; \ + echo " -> "$$(which pip); \ + echo""; \ + echo "If you do not have write permission to that environment, using it to install pipx will fail."; \ + echo "If this is the case, you should install pipx using a virtual one."; \ + echo""; \ + echo "See documentation for more information."; \ + echo""; \ + echo -n "Would you like to use the local available pip above, or create virtual environment to install pipx? [local/virtual]: "; \ + read ans_how; \ + case $$ans_how in \ + "LOCAL" | "Local" |"local") \ + make -s poetry-install-local; \ + ;; \ + "VIRTUAL" | "Virtual" | "virtual") \ + make -s poetry-install-venv; \ + ;; \ + *) \ + echo ""; \ + echo -e "\e[1;39;41m-- WARNING --\e[0m Option $$ans_how not found, exiting process."; \ + echo ""; \ + exit 1; \ + esac; \ else \ - echo "Installing poetry with Conda"; \ - make -s _conda-poetry-install; \ + echo "Installing Poetry"; \ + make -s _pipx_install_poetry; \ fi; \ - fi; - -.PHONY: poetry-install -poetry-install: ## Install standalone Poetry using pipx and create Poetry env. Will install pipx if not found - @echo "Looking for Poetry version...";\ - poetry --version; \ - if [ $$? != "0" ]; then \ - if [ "$(AUTO_INSTALL)" = "true" ]; then \ - ans="y";\ - else \ - echo "Looking for pipx version...";\ - pipx --version; \ - if [ $$? != "0" ]; then \ - echo""; \ - echo -e "\e[1;39;41m-- WARNING --\e[0m The following pip has been found and will be used to install pipx: "; \ - echo " -> "$$(which pip); \ - echo""; \ - echo "If you do not have write permission to that environment, you will need to either activate"; \ - echo "a different environment, or create a virtual one (ex. venv) to install pipx into it."; \ - echo "See documentation for more information."; \ - echo""; \ - echo "Alternatively, the [make poetry-install-venv] target can also be used"; \ - echo""; \ - echo -n "Would you like to install pipx and Poetry? [y/N]: "; \ - else \ - echo""; \ - echo -n "Would you like to install Poetry using pipx? [y/N]: "; \ - fi; \ - read ans; \ - fi; \ - case $$ans in \ - [Yy]*) \ - pipx --version; \ - if [ $$? != "0" ]; then \ - echo "pipx not found; installing pipx"; \ - pip install --user pipx || pip install pipx; \ - pipx ensurepath; \ - fi; \ - echo "Installing Poetry"; \ - pipx install poetry; \ - make -s poetry-create-env; \ - ;; \ - *) \ - echo "Skipping installation."; \ - echo " "; \ - ;; \ - esac; \ - fi; + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + fi; +PIPX_VENV_PATH := $$HOME/.pipx_venv .PHONY: poetry-install-venv -poetry-install-venv: ## Install standalone Poetry and Poetry environment. Will install pipx in $HOME/.pipx_venv - @echo "Creating virtual environment using venv here : [$$HOME/.pipx_venv]" - @python3 -m venv $$HOME/.pipx_venv - @echo "Activating virtual environment [$$HOME/.pipx_venv]" - @source $$HOME/.pipx_venv/bin/activate - @pip3 install pipx - @make -s poetry-install +poetry-install-venv: ## Install standalone Poetry. Will install pipx in $HOME/.pipx_venv + @pipx --version; \ + if [ $$? != "0" ]; then \ + echo "Creating virtual environment using venv here : [$(PIPX_VENV_PATH)]"; \ + python3 -m venv $(PIPX_VENV_PATH); \ + echo "Activating virtual environment [$(PIPX_VENV_PATH)]"; \ + source $(PIPX_VENV_PATH)/bin/activate; \ + pip3 install pipx; \ + pipx ensurepath; \ + source $(PIPX_VENV_PATH)/bin/activate && make -s _pipx_install_poetry ; \ + else \ + make -s _pipx_install_poetry ; \ + fi; + +.PHONY: poetry-install-local +poetry-install-local: ## Install standalone Poetry. Will install pipx with locally available pip. + @pipx --version; \ + if [ $$? != "0" ]; then \ + echo "pipx not found; installing pipx"; \ + pip3 install pipx; \ + pipx ensurepath; \ + fi; + @echo "Installing Poetry" + @make -s _pipx_install_poetry + .PHONY: poetry-env-info poetry-env-info: ## Information about the currently active environment used by Poetry @@ -247,6 +244,10 @@ poetry-create-env: ## Create a Poetry managed environment for the project (Outsi @echo "Use and for more information" @echo"" +.PHONY: poetry-activate +poetry-activate: ## Print the shell command to activate the project's poetry env. + poetry env activate + .PHONY: poetry-remove-env poetry-remove-env: ## Remove current project's Poetry managed environment. @if [ "$(AUTO_INSTALL)" = "true" ]; then \ @@ -255,6 +256,7 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. env_name=$$(basename $$env_path); \ else \ echo""; \ + echo "Looking for poetry environments..."; \ env_path=$$(poetry env info -p); \ if [[ "$$env_path" != "" ]]; then \ echo "The following environment has been found for this project: "; \ @@ -266,11 +268,15 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. echo "If the active environment listed above is a Conda environment,"; \ echo "Choosing to delete it will have no effect; use the target "; \ echo""; \ + echo""; \ + echo "If the active environment listed above is a venv environment,"; \ + echo "Choosing to delete it will have no effect; use the bash command $ rm -rf "; \ + echo""; \ echo -n "Would you like delete the environment listed above? [y/N]: "; \ read ans_env; \ else \ - env_name="None"; \ - env_path="None"; \ + env_name="None"; \ + env_path="None"; \ fi; \ fi; \ if [[ $$env_name != "None" ]]; then \ @@ -282,6 +288,8 @@ poetry-remove-env: ## Remove current project's Poetry managed environment. echo "No environment was found/provided - skipping environment deletion"; \ ;;\ esac; \ + else \ + echo "No environments were found... skipping environment deletion"; \ fi; \ .PHONY: poetry-uninstall @@ -295,7 +303,15 @@ poetry-uninstall: poetry-remove-env ## Uninstall pipx-installed Poetry and the c fi; \ case $$ans in \ [Yy]*) \ - pipx uninstall poetry; \ + pipx --version ; \ + if [ $$? != "0" ]; then \ + echo "" ; \ + echo "Pipx not found globally, trying with $(PIPX_VENV_PATH) env" ;\ + echo "" ; \ + source $(PIPX_VENV_PATH)/bin/activate && pipx uninstall poetry ; \ + else \ + pipx uninstall poetry ; \ + fi; \ ;; \ *) \ echo "Skipping uninstallation."; \ @@ -324,53 +340,253 @@ poetry-uninstall-pipx: poetry-remove-env ## Uninstall pipx-installed Poetry, the esac; \ .PHONY: poetry-uninstall-venv -poetry-uninstall-venv: ## Uninstall pipx-installed Poetry, the created Poetry environment, pipx and $HOME/.pipx_venv - @python3 -m venv $$HOME/.pipx_venv - @source $$HOME/.pipx_venv/bin/activate - @make -s poetry-uninstall-pipx +poetry-uninstall-venv: poetry-remove-env ## Uninstall pipx-installed Poetry, the created Poetry environment, pipx and $HOME/.pipx_venv + @if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo""; \ + echo -n "Would you like to uninstall pipx-installed Poetry and pipx? [y/N]: "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + (source $(PIPX_VENV_PATH)/bin/activate && pipx uninstall poetry); \ + (source $(PIPX_VENV_PATH)/bin/activate && pip uninstall -y pipx); \ + ;; \ + *) \ + echo "Skipping uninstallation."; \ + echo " "; \ + ;; \ + esac; \ + @if [ "$(AUTO_INSTALL)" = "true" ]; then \ ans="y";\ else \ echo""; \ - echo -n "Would you like to remove the virtual environment located here : [$$HOME/.pipx_venv] ? [y/N]: "; \ + echo -n "Would you like to remove the virtual environment located here : [$(PIPX_VENV_PATH)] ? [y/N]: "; \ read ans; \ fi; \ case $$ans in \ [Yy]*) \ - rm -r $$HOME/.pipx_venv; \ + rm -r $(PIPX_VENV_PATH); \ ;; \ *) \ - echo "Skipping [$$HOME/.pipx_venv] virtual environment removal."; \ + echo "Skipping [$(PIPX_VENV_PATH)] virtual environment removal."; \ echo ""; \ ;; \ esac; \ -## -- Install targets (All install targets will install Poetry if not found using `make poetry-install-auto`)-------- ## +## -- Conda targets ------------------------------------------------------------------------------------------------- ## + +.PHONY: conda-install +conda-install: ## Install Conda on your local machine + @echo "Looking for [$(CONDA_TOOL)]..."; \ + $(CONDA_TOOL) --version; \ + if [ $$? != "0" ]; then \ + echo " "; \ + echo "Your defined Conda tool [$(CONDA_TOOL)] has not been found."; \ + echo " "; \ + echo "If you know you already have [$(CONDA_TOOL)] or some other Conda tool installed,"; \ + echo "Check your [CONDA_TOOL] variable in the Makefile.private for typos."; \ + echo " "; \ + echo "If your conda tool has not been initiated through your .bashrc file,"; \ + echo "consider using the full path to its executable instead when"; \ + echo "defining your [CONDA_TOOL] variable"; \ + echo " "; \ + echo "If in doubt, don't install Conda and manually create and activate"; \ + echo "your own Python environment."; \ + echo " "; \ + echo -n "Would you like to install Miniconda ? [y/N]: "; \ + read ans; \ + case $$ans in \ + [Yy]*) \ + echo "Fetching and installing miniconda"; \ + echo " "; \ + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh; \ + bash ~/miniconda.sh -b -p $${HOME}/.conda; \ + export PATH=$${HOME}/.conda/bin:$$PATH; \ + conda init; \ + /usr/bin/rm ~/miniconda.sh; \ + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + else \ + echo "Conda tool [$(CONDA_TOOL)] has been found, skipping installation"; \ + fi; + +.PHONY: conda-create-env +conda-create-env: conda-install ## Create a local Conda environment based on 'environment.yml' file + @$(CONDA_TOOL) env create $(CONDA_YES_OPTION) -f environment.yml + +.PHONY: conda-env-info +conda-env-info: ## Print information about active Conda environment using + @$(CONDA_TOOL) info + +.PHONY: conda-activate +conda-activate: ## Print the shell command to activate the project's Conda env. + @echo "$(CONDA_TOOL) activate $(CONDA_ENVIRONMENT)" + +.PHONY: _conda-poetry-install +_conda-poetry-install: + @$(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) python --version; \ + if [ $$? != "0" ]; then \ + echo "Target environment doesn't seem to exist..."; \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo ""; \ + echo -n "Do you want to create it? [y/N] "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + echo "Creating conda environment : [$(CONDA_ENVIRONMENT)]"; \ + make -s conda-create-env; \ + ;; \ + *) \ + echo "Exiting..."; \ + exit 1;\ + ;; \ + esac;\ + fi; + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) install $(CONDA_YES_OPTION) -c conda-forge poetry; \ + CURRENT_VERSION=$$($(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry --version | awk '{print $$NF}' | tr -d ')'); \ + REQUIRED_VERSION="1.6.0"; \ + if [ "$$(printf '%s\n' "$$REQUIRED_VERSION" "$$CURRENT_VERSION" | sort -V | head -n1)" != "$$REQUIRED_VERSION" ]; then \ + echo "Poetry installed version $$CURRENT_VERSION is less than minimal version $$REQUIRED_VERSION, fixing urllib3 version to prevent problems"; \ + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry add "urllib3<2.0.0"; \ + fi; + +.PHONY:conda-poetry-install +conda-poetry-install: ## Install Poetry in the project's Conda environment. Will fail if Conda is not found + @poetry --version; \ + if [ $$? != "0" ]; then \ + echo "Poetry not found, proceeding to install Poetry..."; \ + echo "Looking for [$(CONDA_TOOL)]...";\ + $(CONDA_TOOL) --version; \ + if [ $$? != "0" ]; then \ + echo "$(CONDA_TOOL) not found; Poetry will not be installed"; \ + else \ + echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ + make -s _conda-poetry-install; \ + fi; \ + else \ + echo ""; \ + echo "Poetry has been found on this system :"; \ + echo " Install location: $$(which poetry)"; \ + echo ""; \ + if [ "$(AUTO_INSTALL)" = "true" ]; then \ + ans="y";\ + else \ + echo -n "Would you like to install poetry in the project's conda environment anyway ? [y/N]: "; \ + read ans; \ + fi; \ + case $$ans in \ + [Yy]*) \ + echo "Installing Poetry with Conda in [$(CONDA_ENVIRONMENT)] environment"; \ + make -s _conda-poetry-install; \ + ;; \ + *) \ + echo "Skipping installation."; \ + echo " "; \ + ;; \ + esac; \ + fi; + +.PHONY: conda-poetry-uninstall +conda-poetry-uninstall: ## Uninstall Poetry located in currently active Conda environment + $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) $(CONDA_TOOL) remove $(CONDA_YES_OPTION) poetry + +.PHONY: conda-clean-env +conda-clean-env: ## Completely removes local project's Conda environment + $(CONDA_TOOL) env remove $(CONDA_YES_OPTION) -n $(CONDA_ENVIRONMENT) + +## -- Install targets (All install targets will install Poetry if not found using 'make poetry-install-auto')-------- ## + +POETRY_COMMAND := poetry + +ifeq ($(DEFAULT_INSTALL_ENV),venv) +POETRY_COMMAND := source $(VENV_ACTIVATE) && poetry +else ifeq ($(DEFAULT_INSTALL_ENV),poetry) +POETRY_COMMAND := poetry +else ifeq ($(DEFAULT_INSTALL_ENV),conda) +POETRY_COMMAND := $(CONDA_TOOL) run -n $(CONDA_ENVIRONMENT) poetry +endif + +.PHONY: _check-env +_check-env: + @if ! [ $(DEFAULT_INSTALL_ENV) ]; then \ + echo -e "\e[1;39;41m-- WARNING --\e[0m No installation environment have been defined." ; \ + echo "" ; \ + echo "Defaulting to Poetry managed environment - Poetry will either use activated environment, or '.venv'," ; \ + echo "if found, or create and manage it's own environment if not." ; \ + elif [ $(DEFAULT_INSTALL_ENV) = "venv" ]; then \ + if [ ! -f $(VENV_ACTIVATE) ]; then \ + make -s venv-create ;\ + fi; \ + elif [ $(DEFAULT_INSTALL_ENV) = "conda" ]; then \ + if ! $(CONDA_TOOL) env list | grep -q $(CONDA_ENVIRONMENT) ; then \ + make -s conda-create-env ; \ + fi; \ + fi; + +.PHONY: _remind-env-activate +_remind-env-activate: + @echo "" + @echo "Activate your environment using the following command:" + @echo "" + @if ! [ $(DEFAULT_INSTALL_ENV) ] || [ $(DEFAULT_INSTALL_ENV) = "poetry" ]; then \ + make -s poetry-env-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make poetry-activate)"; \ + echo "" ; \ + echo "The environment can also be used through the 'poetry run ' command."; \ + echo "" ; \ + echo " Ex: poetry run python "; \ + elif [ $(DEFAULT_INSTALL_ENV) = "venv" ]; then \ + make -s venv-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make venv-activate)"; \ + elif [ $(DEFAULT_INSTALL_ENV) = "conda" ]; then \ + make -s conda-activate ; \ + echo "" ; \ + echo "You can also use the eval bash command : eval \$$(make conda-activate)"; \ + fi; + @echo "" + +test-echo: + @echo "use the eval bash command : eval \$$(make poetry-activate)" .PHONY: install install: install-precommit ## Install the application package, developer dependencies and pre-commit hook .PHONY: install-precommit -install-precommit: install-dev## Install the pre-commit hooks (also installs developer dependencies) +install-precommit: install-dev ## Install the pre-commit hooks (also installs developer dependencies) @if [ -f .git/hooks/pre-commit ]; then \ echo "Pre-commit hook found"; \ else \ echo "Pre-commit hook not found, proceeding to configure it"; \ - poetry run pre-commit install; \ + $(POETRY_COMMAND) run pre-commit install; \ fi; .PHONY: install-dev -install-dev: poetry-install-auto ## Install the application along with developer dependencies - @poetry install --with dev +install-dev: poetry-install-auto _check-env ## Install the application along with developer dependencies + @$(POETRY_COMMAND) install --with dev + @make -s _remind-env-activate .PHONY: install-with-lab -install-with-lab: poetry-install-auto ## Install the application and it's dev dependencies, including Jupyter Lab - @poetry install --with dev --with lab +install-with-lab: poetry-install-auto _check-env ## Install the application and it's dev dependencies, including Jupyter Lab + @$(POETRY_COMMAND) install --with dev --with lab + @make -s _remind-env-activate .PHONY: install-package -install-package: poetry-install-auto ## Install the application package only - @poetry install +install-package: poetry-install-auto _check-env ## Install the application package only + @$(POETRY_COMMAND) install + @make -s _remind-env-activate ## -- Versioning targets -------------------------------------------------------------------------------------------- ## @@ -381,6 +597,10 @@ ifeq ($(filter dry, $(MAKECMDGOALS)), dry) BUMP_ARGS := $(BUMP_ARGS) --dry-run --allow-dirty endif +.PHONY: dry +dry: ## Add the dry target for a preview of changes; ex. 'make bump-major dry' + @-echo > /dev/null + .PHONY: bump-major bump-major: ## Bump application major version $(BUMP_TOOL) $(BUMP_ARGS) bump major @@ -404,9 +624,13 @@ check-lint: ## Check code linting (black, isort, flake8, docformatter and pylint poetry run nox -s check .PHONY: check-pylint -check-pylint: ## Check code linting with pylint +check-pylint: ## Check code with pylint poetry run nox -s pylint +.PHONY: check-complexity +check-complexity: ## Check code cyclomatic complexity with Flake8-McCabe + poetry run nox -s complexity + .PHONY: fix-lint fix-lint: ## Fix code linting (black, isort, flynt, docformatter) poetry run nox -s fix @@ -415,7 +639,6 @@ fix-lint: ## Fix code linting (black, isort, flynt, docformatter) precommit: ## Run Pre-commit on all files manually poetry run nox -s precommit - ## -- Tests targets ------------------------------------------------------------------------------------------------- ## .PHONY: test diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d4254c..6874901 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: "^docs/|/migrations/" +exclude: ^docs/|/migrations/|Makefile* default_stages: [commit] repos: @@ -17,8 +17,18 @@ repos: - id: check-added-large-files args: ["--maxkb=5000"] + - repo: https://github.com/PyCQA/autoflake + rev: v2.3.1 + hooks: + - id: autoflake + + - repo: https://github.com/hhatto/autopep8 + rev: v2.3.2 + hooks: + - id: autopep8 + - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.4.2 hooks: - id: black diff --git a/Makefile.private.example b/Makefile.private.example index be83cd0..c65e8d5 100644 --- a/Makefile.private.example +++ b/Makefile.private.example @@ -24,5 +24,15 @@ DOCKER_COMPOSE := docker compose # a 'true' value will automatically install/remove without asking beforehand. AUTO_INSTALL := false +# The default environment to use. The choices are as follow: [venv, poetry, conda] +# If this is not set, the makefile will use the `poetry` command without activating +# an environment before hand. +# DEFAULT_INSTALL_ENV := conda + +# The default environment where Poetry will be installed. The choices are as follow: [venv, conda] +# If this is not set, the makefile will ask the user where they want to install Poetry +#DEFAULT_POETRY_INSTALL_ENV := venv + + ## -- Private targets ------------------------------------------------------------------------------------------------## diff --git a/climateset/download/__init__.py b/climateset/download/__init__.py index e69de29..b5bc4e2 100644 --- a/climateset/download/__init__.py +++ b/climateset/download/__init__.py @@ -0,0 +1 @@ +from .downloader import download_from_config_file # noqa F401 diff --git a/climateset/download/abstract_downloader.py b/climateset/download/abstract_downloader.py new file mode 100644 index 0000000..ebf69cd --- /dev/null +++ b/climateset/download/abstract_downloader.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class AbstractDownloader(ABC): + @abstractmethod + def download(self): + pass diff --git a/climateset/download/cmip6_downloader.py b/climateset/download/cmip6_downloader.py new file mode 100644 index 0000000..3cc2758 --- /dev/null +++ b/climateset/download/cmip6_downloader.py @@ -0,0 +1,86 @@ +from climateset.download.abstract_downloader import AbstractDownloader +from climateset.download.constants.esgf import CMIP6 +from climateset.download.downloader_config import ( + CMIP6DownloaderConfig, + create_cmip6_downloader_config_from_file, +) +from climateset.download.utils import search_and_download_esgf_model_single_var +from climateset.utils import create_logger + +LOGGER = create_logger(__name__) + + +class CMIP6Downloader(AbstractDownloader): + def __init__(self, config: CMIP6DownloaderConfig): + self.logger = LOGGER + self.config = config + + def download(self): + """ + Function handling the download of all variables that are associated with a model's output. + + Searches for all files associated with the respected variables and experiment that the downloader + was initialized with. + + A search connection is established and the search is iteratively constraint to meet all specifications. + Data is downloaded and stored in a separate file for each year. The default format is netCDF4. + + Resulting hierarchy: + + `CMIPx/model_id/ensemble_member/experiment/variable/nominal_resolution/frequency/year.nc` + + If the constraints cannot be met, per default behaviour for the downloader to select first other + available value + """ + for model in self.config.models: + self.logger.info(f"Downloading data for model: [{model}]") + for variable in self.config.variables: + self.logger.info(f"Downloading data for variable: [{variable}]") + for experiment in self.config.experiments: + self.logger.info(f"Downloading data for experiment: [{experiment}]") + self.download_from_model_single_var( + model=model, project=self.config.project, variable=variable, experiment=experiment + ) + + def download_from_model_single_var( + self, + model: str, + variable: str, + experiment: str, + project: str = CMIP6, + default_frequency: str = "mon", + preferred_version: str = "latest", + default_grid_label: str = "gn", + ): + """ + Function handling the download of a single variable-experiment pair that is associated with a model's output + (CMIP data). + + Args: + model (str): The model ID + variable: variable ID + experiment: experiment ID + project: umbrella project id e.g. CMIPx + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + default_grid_label: default gridding method in which the data is provided + """ + results_list = search_and_download_esgf_model_single_var( + model=model, + variable=variable, + experiment=experiment, + project=project, + default_frequency=default_frequency, + default_grid_label=default_grid_label, + preferred_version=preferred_version, + ensemble_members=self.config.ensemble_members, + max_ensemble_members=self.config.max_ensemble_members, + base_path=self.config.data_dir, + ) + self.logger.info(f"Download results: {results_list}") + + +def cmip6_download_from_config(config): + config_object = create_cmip6_downloader_config_from_file(config) + downloader = CMIP6Downloader(config=config_object) + downloader.download() diff --git a/climateset/download/constants/__init__.py b/climateset/download/constants/__init__.py index e69de29..c91abec 100644 --- a/climateset/download/constants/__init__.py +++ b/climateset/download/constants/__init__.py @@ -0,0 +1,10 @@ +NODE_LINK_URLS = [ + "https://esgf-node.llnl.gov/esg-search", + "https://esgf.ceda.ac.uk/esg-search", + "https://esgf-data.dkrz.de/esg-search", + "https://esgf-node.ipsl.upmc.fr/esg-search", + "https://esg-dn1.nsc.liu.se/esg-search", + "https://esgf.nci.org.au/esg-search", + "https://esgf.nccs.nasa.gov/esg-search", + "https://esgdata.gfdl.noaa.gov/esg-search", +] diff --git a/climateset/download/constants/cmip6.py b/climateset/download/constants/cmip6.py new file mode 100644 index 0000000..656a29f --- /dev/null +++ b/climateset/download/constants/cmip6.py @@ -0,0 +1,34 @@ +# TODO remove raw variables from here +# pylint: disable=C0103 +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + + +@dataclass(frozen=True) +class Cmip6Constants: + """ + Dataclass to represent CMIP6 constants that are used by the download module. + + Attributes: + NODE_LINK : Where the data can be accessed + MODEL_SOURCES : Identifiers for supported climate models + VAR_SOURCE_LOOKUP : model and raw variables + SUPPORTED_EXPERIMENTS : experiments of climate models (runs) that are supported + """ + + NODE_LINK: Final[str] + MODEL_SOURCES: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] + + +_data = get_yaml_config("downloader/constants/cmip6.yaml") + +CMIP6_CONSTANTS = Cmip6Constants( + NODE_LINK=_data["node_link"], + MODEL_SOURCES=tuple(_data["model_sources"]), + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/climateset/download/constants/cmip6plus.py b/climateset/download/constants/cmip6plus.py new file mode 100644 index 0000000..d725342 --- /dev/null +++ b/climateset/download/constants/cmip6plus.py @@ -0,0 +1,35 @@ +# pylint: disable=C0103 +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + +# TODO remove raw variables from here + + +@dataclass(frozen=True) +class Cmip6plusConstants: + """ + Dataclass to represent CMIP6PLUS constants that are used by the download module. + + Attributes: + NODE_LINK : Where the data can be accessed + MODEL_SOURCES : Identifiers for supported climate models + VAR_SOURCE_LOOKUP : model and raw variables + SUPPORTED_EXPERIMENTS : experiments of climate models (runs) that are supported + """ + + NODE_LINK: Final[str] + MODEL_SOURCES: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] + + +_data = get_yaml_config("downloader/constants/cmip6plus.yaml") + +CMIP6PLUS_CONSTANTS = Cmip6plusConstants( + NODE_LINK=_data["node_link"], + MODEL_SOURCES=tuple(_data["model_sources"]), + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/climateset/download/constants/data_constants.py b/climateset/download/constants/data_constants.py deleted file mode 100644 index ff89eb1..0000000 --- a/climateset/download/constants/data_constants.py +++ /dev/null @@ -1,18 +0,0 @@ -EMISSIONS_ENDINGS = ["_em_openburning", "_em_anthro", "_em_AIR_anthro"] - -META_ENDINGS_PRC = [ - "_percentage_AGRI", - "_percentage_BORF", - "_percentage_DEFO", - "_percentage_PEAT", - "_percentage_SAVA", - "_percentage_TEMF", -] -META_ENDINGS_SHAR = ["_openburning_share"] - -LON_LAT_TO_GRID_SIZE = { - (720, 360): "25_km", - (360, 720): "25_km", - (96, 144): "250_km", - (144, 96): "250_km", -} diff --git a/climateset/download/constants/esgf.py b/climateset/download/constants/esgf.py new file mode 100644 index 0000000..00ebc92 --- /dev/null +++ b/climateset/download/constants/esgf.py @@ -0,0 +1,23 @@ +from .cmip6 import CMIP6_CONSTANTS +from .cmip6plus import CMIP6PLUS_CONSTANTS +from .input4mips import INPUT4MIPS_CONSTANTS + +CMIP6 = "CMIP6" +CMIP6PLUS = "CMIP6Plus" +INPUT4MIPS = "input4MIPs" + +ESGF_PROJECTS = frozenset([CMIP6, CMIP6PLUS, INPUT4MIPS]) + +# constant classes for esgf projects implemented here +# add your own esgf project for downloading to download/constants/ and add the constant class to the dict and lists here +ESGF_PROJECTS_CONSTANTS = { + CMIP6: CMIP6_CONSTANTS, + CMIP6PLUS: CMIP6PLUS_CONSTANTS, + INPUT4MIPS: INPUT4MIPS_CONSTANTS, +} + +# datasets that provide inputs to climate models +ESGF_RAW_INPUT_LIST = [INPUT4MIPS] + +# datasets that provide outputs from climate models +ESGF_MODEL_OUTPUT_LIST = [CMIP6, CMIP6PLUS] diff --git a/climateset/download/constants/esgf_server.py b/climateset/download/constants/esgf_server.py deleted file mode 100644 index cf5bd17..0000000 --- a/climateset/download/constants/esgf_server.py +++ /dev/null @@ -1,1359 +0,0 @@ -# Supported Model sources -NODE_LINK = "http://esgf-node.llnl.gov/esg-search/" -MODEL_SOURCES = { - "ACCESS-CM2": { - "node_link": NODE_LINK, - "center": "CSIRO-ARCCSS", - }, - "ACCESS-ESM1-5": { - "node_link": NODE_LINK, - "center": "CSIRO", - }, - "AWI-CM-1-1-MR": { - "node_link": NODE_LINK, - "center": "AWI", - }, - "BCC-CSM2-MR": { - "node_link": NODE_LINK, - "center": "BCC", - }, - "CAMS-CSM1-0": { - "node_link": NODE_LINK, - "center": "CAMS", - }, - "CAS-ESM2-0": { - "node_link": NODE_LINK, - "center": "CAS", - }, - "CESM2": {"node_link": NODE_LINK, "center": "NCAR"}, - "CESM2-WACCM": { - "node_link": NODE_LINK, - "center": "NCAR", - }, - "CMCC-CM2-SR5": { - "node_link": NODE_LINK, - "center": "NCAR", - }, - "CMCC-ESM2": { - "node_link": NODE_LINK, - "center": "CMCC", - }, - "CNRM-CM6-1": { - "node_link": NODE_LINK, - "center": "CNRM-CERFACS", - }, - "CNRM-CM6-1-HR": { - "node_link": NODE_LINK, - "center": "CNRM-CERFACS", - }, - "CNRM-ESM2-1": { - "node_link": NODE_LINK, - "center": "CNRM-CERFACS", - }, - "EC-Earth3": { - "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", - }, - "EC-Earth3-Veg": { - "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", - }, - "EC-Earth3-Veg-LR": { - "node_link": NODE_LINK, - "center": "EC-Earth-Consortium", - }, - "FGOALS-f3-L": { - "node_link": NODE_LINK, - "center": "CAS", - }, - "FGOALS-g3": { - "node_link": NODE_LINK, - "center": "CAS", - }, - "GFDL-ESM4": { - "node_link": NODE_LINK, - "center": "NOAA-GFDL", - }, - "GISS-E2-1-G": { - "node_link": NODE_LINK, - "center": "NASA-GISS", - }, - "GISS-E2-1-H": { - "node_link": NODE_LINK, - "center": "NASA-GISS", - }, - "GISS-E2-2-G": { - "node_link": NODE_LINK, - "center": "NASA-GISS", - }, - "IITM-ESM": { - "node_link": NODE_LINK, - "center": "CCCR-IITM", - }, - "INM-CM4-8": { - "node_link": NODE_LINK, - "center": "INM", - }, - "INM-CM5-0": { - "node_link": NODE_LINK, - "center": "INM", - }, - "IPSL-CM6A-LR": { - "node_link": NODE_LINK, - "center": "IPSL", - }, - "KACE-1-0-G": { - "node_link": NODE_LINK, - "center": "NIMS-KMA ", - }, - "MCM-UA-1-0": { - "node_link": NODE_LINK, - "center": "UA", - }, - "MIROC6": {"node_link": NODE_LINK, "center": "MIROC"}, - # there are several centers for the MPI models - consider choosing another one if needed - "MPI-ESM1-2-HR": { - "node_link": NODE_LINK, - "center": "MPI-M", - }, - "MPI-ESM1-2-LR": { - "node_link": NODE_LINK, - "center": "MPI-M", - }, - "MRI-ESM2-0": { - "node_link": NODE_LINK, - "center": "MRI", - }, - "NorESM2-LM": { - "node_link": "https://esgf-data.dkrz.de/esg-search", - "center": "NCC", - }, - "NorESM2-MM": { - "node_link": "https://esgf-data.dkrz.de/esg-search", - "center": "NCC", - }, - "TaiESM1": { - "node_link": NODE_LINK, - "center": "AS-RCEC", - }, - # there are several centers for the UKESM models - consider choosing another one if needed - "UKESM1-0-LL": { - "node_link": NODE_LINK, - "center": "MOHC", - }, - # "NorESM2-LM": {"node_link": "https://esgf-data.dkrz.de/esg-search", "center": "NCC"}, - # "CanESM5" : {"node_link": NODE_LINK, "center": "CCCma"} -} - -VAR_SOURCE_LOOKUP = { - "model": [ - "ztp", - "zsatcalc", - "zsatarag", - "zostoga", - "zossq", - "zos", - "zoocos", - "zooc", - "zo2min", - "zhalfo", - "zg500", - "zg1000", - "zg100", - "zg10", - "zg", - "zfullo", - "wtd", - "wo", - "wmo", - "wfonocorr", - "wfo", - "wetss", - "wetso4", - "wetso2", - "wetlandFrac", - "wetlandCH4", - "wetbc", - "wap500", - "wap", - "vsf", - "volo", - "volcello", - "vo", - "vmo", - "vegHeight", - "va", - "uo", - "umo", - "ua", - "tslsi", - "tsl", - "ts", - "tran", - "tossq", - "tosga", - "tos", - "tob", - "thkcello", - "thetaot700", - "thetaot300", - "thetaot2000", - "thetaot", - "thetaoga", - "thetao", - "tgs", - "tcs", - "tauvo", - "tauv", - "tauuo", - "tauu", - "tasmin", - "tasmax", - "tas", - "talkos", - "talknat", - "talk", - "ta850", - "ta700", - "ta500", - "ta", - "t20d", - "spco2", - "sossq", - "sosga", - "sos", - "sootsn", - "somint", - "soga", - "sob", - "so2", - "so", - "snw", - "sndmasswindrif", - "sndmasssnf", - "sndmasssi", - "sndmassmelt", - "snd", - "snc", - "sivols", - "sivoln", - "sivol", - "siv", - "siu", - "sitimefrac", - "sithick", - "sitemptop", - "sitempsnic", - "sitempbot", - "sistryubot", - "sistrydtop", - "sistrxubot", - "sistrxdtop", - "sispeed", - "sisnthick", - "sisnmass", - "sisnhc", - "sisnconc", - "sirdgthick", - "sirdgconc", - "sipr", - "sios", - "simpconc", - "simass", - "siitdthick", - "siitdsnthick", - "siitdsnconc", - "siitdconc", - "sihc", - "siforcetilty", - "siforcetiltx", - "siforceintstry", - "siforceintstrx", - "siforcecorioly", - "siforcecoriolx", - "siflswutop", - "siflswdtop", - "siflswdbot", - "siflsensupbot", - "siflsenstop", - "sifllwutop", - "sifllwdtop", - "sifllatstop", - "siflfwdrain", - "siflfwbot", - "siflcondtop", - "siflcondbot", - "sifb", - "siextents", - "siextentn", - "sidmasstrany", - "sidmasstranx", - "sidmassth", - "sidmasssi", - "sidmassmelttop", - "sidmassmeltbot", - "sidmasslat", - "sidmassgrowthwat", - "sidmassgrowthbot", - "sidmassevapsubl", - "sidmassdyn", - "sidivvel", - "sidconcth", - "sidconcdyn", - "siconc", - "sicompstren", - "siarean", - "siage", - "si", - "sftof", - "sftlf", - "sftgif", - "sfdsi", - "sfcWind", - "sf6", - "rtmt", - "rsutcsaf", - "rsutcs", - "rsutaf", - "rsut", - "rsuscs", - "rsus", - "rsntds", - "rsdt", - "rsdsdiff", - "rsdscs", - "rsds", - "rlutcsaf", - "rlutcs", - "rlutaf", - "rlut", - "rlus", - "rldscs", - "rlds", - "rh", - "reffclwtop", - "ra", - "rMaint", - "rGrowth", - "qgwr", - "pso", - "psl", - "ps", - "prw", - "prveg", - "prsn", - "prra", - "prc", - "pr", - "ppos", - "pp", - "popos", - "pop", - "ponos", - "pon", - "po4os", - "po4", - "phynos", - "phyn", - "phyfeos", - "phyfe", - "phyc", - "phos", - "phnat", - "phalf", - "ph", - "pfull", - "pctisccp", - "pbo", - "orog", - "opottempmint", - "oh", - "od870aer", - "od550ss", - "od550so4", - "od550oa", - "od550lt1aer", - "od550dust", - "od550csaer", - "od550bc", - "od550aerh2o", - "od550aer", - "od440aer", - "obvfsq", - "o3", - "o2satos", - "o2sat", - "o2os", - "o2min", - "o2", - "nppWood", - "nppRoot", - "nppLeaf", - "npp", - "no3os", - "no3", - "nep", - "nbp", - "nVeg", - "nStem", - "nSoil", - "nRoot", - "nMineralNO3", - "nMineralNH4", - "nMineral", - "nLitter", - "nLeaf", - "nLand", - "n2oglobal", - "msftmzmpa", - "msftmz", - "msftmrhompa", - "msftmrho", - "msftbarot", - "mrtws", - "mrsos", - "mrsol", - "mrso", - "mrsll", - "mrsfl", - "mrros", - "mrrob", - "mrro", - "mrlso", - "mrfso", - "mmrss", - "mmrsoa", - "mmrso4", - "mmrpm2p5", - "mmrpm1", - "mmroa", - "mmrdust", - "mmrbc", - "mmraerh2o", - "mlotstsq", - "mlotstmin", - "mlotstmax", - "mlotst", - "mfo", - "masso", - "masscello", - "lwsnl", - "lwp", - "loadss", - "loaddust", - "lai", - "isop", - "intpp", - "intpoc", - "intpn2", - "intdoc", - "intdic", - "huss", - "hus", - "hurs", - "hur", - "hfy", - "hfx", - "hfss", - "hfls", - "hfds", - "hfbasinpmdiff", - "hfbasinpmadv", - "hfbasinpadv", - "hfbasin", - "gpp", - "fsitherm", - "froc", - "frn", - "friver", - "fric", - "frfe", - "ficeberg", - "fgo2", - "fgdms", - "fgco2nat", - "fgco2", - "fVegLitterSenescence", - "fVegLitterMortality", - "fVegLitter", - "fNup", - "fNnetmin", - "fNloss", - "fNleach", - "fNgasNonFire", - "fNgasFire", - "fNgas", - "fNfert", - "fNdep", - "fNProduct", - "fNOx", - "fN2O", - "fLuc", - "fLitterFire", - "fHarvestToProduct", - "fHarvest", - "fFireNat", - "fFire", - "fDeforestToProduct", - "fBNF", - "evspsblveg", - "evspsblsoi", - "evspsbl", - "evs", - "esn", - "es", - "epsi100", - "epp100", - "epn100", - "epfe100", - "epcalc100", - "epc100", - "emivoc", - "emiss", - "emiso4", - "emiso2", - "emioa", - "emiisop", - "emidust", - "emidms", - "emibvoc", - "emibc", - "ec", - "dryso4", - "dryso2", - "drybc", - "dpco2", - "dmsos", - "dms", - "dmlt", - "dissocos", - "dissoc", - "dissicos", - "dissicnat", - "dissic", - "dfeos", - "dfe", - "detocos", - "detoc", - "deptho", - "cod", - "co3satcalcos", - "co3satcalc", - "co3sataragos", - "co3satarag", - "co3os", - "co3nat", - "co3", - "co2mass", - "co2", - "clwvi", - "clwmodis", - "clw", - "cltmodis", - "cltisccp", - "cltcalipso", - "clt", - "clmcalipso", - "cllcalipso", - "clivi", - "climodis", - "cli", - "clhcalipso", - "cl", - "chlos", - "chl", - "chepsoa", - "ch4global", - "cfc12global", - "cfc12", - "cfc11global", - "cfc11", - "cdnc", - "cct", - "ccn", - "ccb", - "calcos", - "calc", - "cWood", - "cVeg", - "cStem", - "cSoilSlow", - "cSoilMedium", - "cSoilFast", - "cSoilAbove1m", - "cSoil", - "cRoot", - "cMisc", - "cLitter", - "cLeaf", - "cLand", - "cCwd", - "bsios", - "bsi", - "bldep", - "bfeos", - "bfe", - "basin", - "ares", - "areacello", - "areacella", - "albisccp", - "airmass", - "agessc", - "abs550aer", - ], - "raw": [ - "years", - "year_weight", - "year_fr", - "wlenbinsize", - "wlen_bnds", - "wlen", - "wfo", - "wetnoy", - "wetnhx", - "water_vapor", - "vos", - "volume_density", - "vo", - "vmro3", - "vas", - "urban_to_secdn", - "urban_to_secdf", - "urban_to_range", - "urban_to_pastr", - "urban_to_c4per", - "urban_to_c4ann", - "urban_to_c3per", - "urban_to_c3nfx", - "urban_to_c3ann", - "urban", - "uos", - "uo", - "uas", - "tsi", - "ts", - "total_solar_irradiance", - "tosbcs", - "tos", - "thetao", - "theta", - "temp_level", - "temp_layer", - "tauv", - "tauu", - "tas", - "surface_temperature", - "surface_emissivity", - "surface_albedo", - "sst", - "ssn", - "ssi", - "ssa550", - "sos", - "solar_zenith_angle", - "so2f2_SH", - "so2f2_NH", - "so2f2_GM", - "so", - "sithick", - "sig_lon_W", - "sig_lon_E", - "sig_lat_W", - "sig_lat_E", - "siconcbcs", - "siconca", - "siconc", - "sftof", - "sftflf", - "sf6_SH", - "sf6_NH", - "sf6_GM", - "secyf_harv", - "secyf_bioh", - "secnf_harv", - "secnf_bioh", - "secmf_harv", - "secmf_bioh", - "secmb", - "secma", - "secdn_to_urban", - "secdn_to_secdf", - "secdn_to_range", - "secdn_to_pastr", - "secdn_to_c4per", - "secdn_to_c4ann", - "secdn_to_c3per", - "secdn_to_c3nfx", - "secdn_to_c3ann", - "secdn", - "secdf_to_urban", - "secdf_to_secdn", - "secdf_to_range", - "secdf_to_pastr", - "secdf_to_c4per", - "secdf_to_c4ann", - "secdf_to_c3per", - "secdf_to_c3nfx", - "secdf_to_c3ann", - "secdf", - "scph", - "scnum", - "sad_of_big_particles", - "sad", - "rsds", - "rndwd", - "rmean", - "rlds", - "range_to_urban", - "range_to_secdn", - "range_to_secdf", - "range_to_pastr", - "range_to_c4per", - "range_to_c4ann", - "range_to_c3per", - "range_to_c3nfx", - "range_to_c3ann", - "range", - "ptbio", - "psl", - "prsn", - "prra", - "profile_weight", - "primn_to_urban", - "primn_to_secdf", - "primn_to_range", - "primn_to_pastr", - "primn_to_c4per", - "primn_to_c4ann", - "primn_to_c3per", - "primn_to_c3nfx", - "primn_to_c3ann", - "primn_harv", - "primn_bioh", - "primn", - "primf_to_urban", - "primf_to_secdn", - "primf_to_range", - "primf_to_pastr", - "primf_to_c4per", - "primf_to_c4ann", - "primf_to_c3per", - "primf_to_c3nfx", - "primf_to_c3ann", - "primf_harv", - "primf_bioh", - "primf", - "pressure", - "pres_level", - "pres_layer", - "pr", - "plume_number", - "plume_lon", - "plume_lat", - "plume_feature", - "percentage_TEMF", - "percentage_SAVA", - "percentage_PEAT", - "percentage_DEFO", - "percentage_BORF", - "percentage_AGRI", - "pastr_to_urban", - "pastr_to_secdn", - "pastr_to_secdf", - "pastr_to_range", - "pastr_to_c4per", - "pastr_to_c4ann", - "pastr_to_c3per", - "pastr_to_c3nfx", - "pastr_to_c3ann", - "pastr", - "ozone", - "oxygen_GM", - "nitrous_oxide_SH", - "nitrous_oxide_NH", - "nitrous_oxide_GM", - "nitrogen_GM", - "nf3_SH", - "nf3_NH", - "nf3_GM", - "mrro", - "month", - "mole_fraction_of_so2f2_in_air", - "mole_fraction_of_sf6_in_air", - "mole_fraction_of_nitrous_oxide_in_air", - "mole_fraction_of_nf3_in_air", - "mole_fraction_of_methyl_chloride_in_air", - "mole_fraction_of_methyl_bromide_in_air", - "mole_fraction_of_methane_in_air", - "mole_fraction_of_hfc4310mee_in_air", - "mole_fraction_of_hfc365mfc_in_air", - "mole_fraction_of_hfc32_in_air", - "mole_fraction_of_hfc245fa_in_air", - "mole_fraction_of_hfc23_in_air", - "mole_fraction_of_hfc236fa_in_air", - "mole_fraction_of_hfc227ea_in_air", - "mole_fraction_of_hfc152a_in_air", - "mole_fraction_of_hfc143a_in_air", - "mole_fraction_of_hfc134aeq_in_air", - "mole_fraction_of_hfc134a_in_air", - "mole_fraction_of_hfc125_in_air", - "mole_fraction_of_hcfc22_in_air", - "mole_fraction_of_hcfc142b_in_air", - "mole_fraction_of_hcfc141b_in_air", - "mole_fraction_of_halon2402_in_air", - "mole_fraction_of_halon1301_in_air", - "mole_fraction_of_halon1211_in_air", - "mole_fraction_of_co2eq_in_air", - "mole_fraction_of_chcl3_in_air", - "mole_fraction_of_ch3ccl3_in_air", - "mole_fraction_of_ch2cl2_in_air", - "mole_fraction_of_cfc12eq_in_air", - "mole_fraction_of_cfc12_in_air", - "mole_fraction_of_cfc11eq_in_air", - "mole_fraction_of_cfc11_in_air", - "mole_fraction_of_cfc115_in_air", - "mole_fraction_of_cfc114_in_air", - "mole_fraction_of_cfc113_in_air", - "mole_fraction_of_cf4_in_air", - "mole_fraction_of_carbon_tetrachloride_in_air", - "mole_fraction_of_carbon_dioxide_in_air", - "mole_fraction_of_c_c4f8_in_air", - "mole_fraction_of_c8f18_in_air", - "mole_fraction_of_c7f16_in_air", - "mole_fraction_of_c6f14_in_air", - "mole_fraction_of_c5f12_in_air", - "mole_fraction_of_c4f10_in_air", - "mole_fraction_of_c3f8_in_air", - "mole_fraction_of_c2f6_in_air", - "methyl_chloride_SH", - "methyl_chloride_NH", - "methyl_chloride_GM", - "methyl_bromide_SH", - "methyl_bromide_NH", - "methyl_bromide_GM", - "methane_SH", - "methane_NH", - "methane_GM", - "mask4resto_ipv_Nextrop", - "mask4resto_ipv", - "mask4resto_amv_trop", - "mask4resto_amv_extrop", - "mask4resto_amv", - "lon_bounds", - "licalvf", - "lat_bounds", - "kp", - "is_biomass", - "irrig_c4per", - "irrig_c4ann", - "irrig_c3per", - "irrig_c3nfx", - "irrig_c3ann", - "ipv_index", - "iprp", - "iprm", - "iprg", - "icwtr", - "huss", - "hfds", - "hfc4310mee_SH", - "hfc4310mee_NH", - "hfc4310mee_GM", - "hfc365mfc_SH", - "hfc365mfc_NH", - "hfc365mfc_GM", - "hfc32_SH", - "hfc32_NH", - "hfc32_GM", - "hfc245fa_SH", - "hfc245fa_NH", - "hfc245fa_GM", - "hfc23_SH", - "hfc23_NH", - "hfc23_GM", - "hfc236fa_SH", - "hfc236fa_NH", - "hfc236fa_GM", - "hfc227ea_SH", - "hfc227ea_NH", - "hfc227ea_GM", - "hfc152a_SH", - "hfc152a_NH", - "hfc152a_GM", - "hfc143a_SH", - "hfc143a_NH", - "hfc143a_GM", - "hfc134aeq_SH", - "hfc134aeq_NH", - "hfc134aeq_GM", - "hfc134a_SH", - "hfc134a_NH", - "hfc134a_GM", - "hfc125_SH", - "hfc125_NH", - "hfc125_GM", - "hcfc22_SH", - "hcfc22_NH", - "hcfc22_GM", - "hcfc142b_SH", - "hcfc142b_NH", - "hcfc142b_GM", - "hcfc141b_SH", - "hcfc141b_NH", - "hcfc141b_GM", - "halon2402_SH", - "halon2402_NH", - "halon2402_GM", - "halon1301_SH", - "halon1301_NH", - "halon1301_GM", - "halon1211_SH", - "halon1211_NH", - "halon1211_GM", - "gzdis", - "gridcellarea", - "gpbio", - "gldis", - "glat_bnds", - "glat", - "fulwd", - "ftr_weight", - "fstnf", - "friver", - "flood", - "fill_flag", - "fharv_c4per", - "fharv_c3per", - "fertl_c4per", - "fertl_c4ann", - "fertl_c3per", - "fertl_c3nfx", - "fertl_c3ann", - "f107", - "expt_label", - "evspsbl", - "drynoy", - "drynhx", - "delta13co2_in_air", - "datasource", - "crpbf_total", - "crpbf_c4per", - "crpbf_c4ann", - "crpbf_c3per", - "crpbf_c3nfx", - "crpbf_c3ann", - "combf", - "co2eq_SH", - "co2eq_NH", - "co2eq_GM", - "chcl3_SH", - "chcl3_NH", - "chcl3_GM", - "ch3ccl3_SH", - "ch3ccl3_NH", - "ch3ccl3_GM", - "ch2cl2_SH", - "ch2cl2_NH", - "ch2cl2_GM", - "cfc12eq_SH", - "cfc12eq_NH", - "cfc12eq_GM", - "cfc12_SH", - "cfc12_NH", - "cfc12_GM", - "cfc11eq_SH", - "cfc11eq_NH", - "cfc11eq_GM", - "cfc11_SH", - "cfc11_NH", - "cfc11_GM", - "cfc115_SH", - "cfc115_NH", - "cfc115_GM", - "cfc114_SH", - "cfc114_NH", - "cfc114_GM", - "cfc113_SH", - "cfc113_NH", - "cfc113_GM", - "cf4_SH", - "cf4_NH", - "cf4_GM", - "ccode", - "carea", - "carbon_tetrachloride_SH", - "carbon_tetrachloride_NH", - "carbon_tetrachloride_GM", - "carbon_monoxide_GM", - "carbon_dioxide_SH", - "carbon_dioxide_NH", - "carbon_dioxide_GM", - "calyear", - "calmonth", - "calday", - "c_c4f8_SH", - "c_c4f8_NH", - "c_c4f8_GM", - "c8f18_SH", - "c8f18_NH", - "c8f18_GM", - "c7f16_SH", - "c7f16_NH", - "c7f16_GM", - "c6f14_SH", - "c6f14_NH", - "c6f14_GM", - "c5f12_SH", - "c5f12_NH", - "c5f12_GM", - "c4per_to_urban", - "c4per_to_secdn", - "c4per_to_secdf", - "c4per_to_range", - "c4per_to_pastr", - "c4per_to_c4ann", - "c4per_to_c3per", - "c4per_to_c3nfx", - "c4per_to_c3ann", - "c4per", - "c4f10_SH", - "c4f10_NH", - "c4f10_GM", - "c4ann_to_urban", - "c4ann_to_secdn", - "c4ann_to_secdf", - "c4ann_to_range", - "c4ann_to_pastr", - "c4ann_to_c4per", - "c4ann_to_c3per", - "c4ann_to_c3nfx", - "c4ann_to_c3ann", - "c4ann", - "c3per_to_urban", - "c3per_to_secdn", - "c3per_to_secdf", - "c3per_to_range", - "c3per_to_pastr", - "c3per_to_c4per", - "c3per_to_c4ann", - "c3per_to_c3nfx", - "c3per_to_c3ann", - "c3per", - "c3nfx_to_urban", - "c3nfx_to_secdn", - "c3nfx_to_secdf", - "c3nfx_to_range", - "c3nfx_to_pastr", - "c3nfx_to_c4per", - "c3nfx_to_c4ann", - "c3nfx_to_c3per", - "c3nfx_to_c3ann", - "c3nfx", - "c3f8_SH", - "c3f8_NH", - "c3f8_GM", - "c3ann_to_urban", - "c3ann_to_secdn", - "c3ann_to_secdf", - "c3ann_to_range", - "c3ann_to_pastr", - "c3ann_to_c4per", - "c3ann_to_c4ann", - "c3ann_to_c3per", - "c3ann_to_c3nfx", - "c3ann", - "c2f6_SH", - "c2f6_NH", - "c2f6_GM", - "bounds_time", - "bounds_sector", - "bounds_latitude", - "bounds_altitude", - "beta_b", - "beta_a", - "asy550", - "asl", - "areacello", - "areacellg", - "areacella", - "ap", - "aod_spmx", - "aod_fmbg", - "ann_cycle", - "angstrom", - "amv_index", - "altitude", - "added_tree_cover", - "acabf", - "WST", - "VOC_openburning_share", - "VOC_em_openburning", - "VOC_em_anthro", - "VOC_em_AIR_anthro", - "VOC25_other_voc_em_speciated_VOC_anthro", - "VOC25_other_voc_em_speciated_VOC", - "VOC25-other_voc_em_speciated_VOC", - "VOC24_acids_em_speciated_VOC_anthro", - "VOC24_acids_em_speciated_VOC", - "VOC24-acids_em_speciated_VOC", - "VOC23_ketones_em_speciated_VOC_anthro", - "VOC23_ketones_em_speciated_VOC", - "VOC23-ketones_em_speciated_VOC", - "VOC22_other_alka_em_speciated_VOC_anthro", - "VOC22_other_alka_em_speciated_VOC", - "VOC22-other_alka_em_speciated_VOC", - "VOC21_methanal_em_speciated_VOC_anthro", - "VOC21_methanal_em_speciated_VOC", - "VOC21-methanal_em_speciated_VOC", - "VOC20_chlorinate_em_speciated_VOC_anthro", - "VOC20_chlorinate_em_speciated_VOC", - "VOC20-chlorinate_em_speciated_VOC", - "VOC19_ethers_em_speciated_VOC_anthro", - "VOC19_ethers_em_speciated_VOC", - "VOC19-ethers_em_speciated_VOC", - "VOC18_esters_em_speciated_VOC_anthro", - "VOC18_esters_em_speciated_VOC", - "VOC18-esters_em_speciated_VOC", - "VOC17_other_arom_em_speciated_VOC_anthro", - "VOC17_other_arom_em_speciated_VOC", - "VOC17-other_arom_em_speciated_VOC", - "VOC16_trimethylb_em_speciated_VOC_anthro", - "VOC16_trimethylb_em_speciated_VOC", - "VOC16-trimethylb_em_speciated_VOC", - "VOC15_xylene_em_speciated_VOC_anthro", - "VOC15_xylene_em_speciated_VOC", - "VOC15-xylene_em_speciated_VOC", - "VOC14_toluene_em_speciated_VOC_anthro", - "VOC14_toluene_em_speciated_VOC", - "VOC14-toluene_em_speciated_VOC", - "VOC13_benzene_em_speciated_VOC_anthro", - "VOC13_benzene_em_speciated_VOC", - "VOC13-benzene_em_speciated_VOC", - "VOC12_other_alke_em_speciated_VOC_anthro", - "VOC12_other_alke_em_speciated_VOC", - "VOC12-other_alke_em_speciated_VOC", - "VOC09_ethyne_em_speciated_VOC_anthro", - "VOC09_ethyne_em_speciated_VOC", - "VOC09-ethyne_em_speciated_VOC", - "VOC08_propene_em_speciated_VOC_anthro", - "VOC08_propene_em_speciated_VOC", - "VOC08-propene_em_speciated_VOC", - "VOC07_ethene_em_speciated_VOC_anthro", - "VOC07_ethene_em_speciated_VOC", - "VOC07-ethene_em_speciated_VOC", - "VOC06_hexanes_pl_em_speciated_VOC_anthro", - "VOC06_hexanes_pl_em_speciated_VOC", - "VOC06-hexanes_pl_em_speciated_VOC", - "VOC05_pentanes_em_speciated_VOC_anthro", - "VOC05_pentanes_em_speciated_VOC", - "VOC05-pentanes_em_speciated_VOC", - "VOC04_butanes_em_speciated_VOC_anthro", - "VOC04_butanes_em_speciated_VOC", - "VOC04-butanes_em_speciated_VOC", - "VOC03_propane_em_speciated_VOC_anthro", - "VOC03_propane_em_speciated_VOC", - "VOC03-propane_em_speciated_VOC", - "VOC02_ethane_em_speciated_VOC_anthro", - "VOC02_ethane_em_speciated_VOC", - "VOC02-ethane_em_speciated_VOC", - "VOC01_alcohols_em_speciated_VOC_anthro", - "VOC01_alcohols_em_speciated_VOC", - "VOC01-alcohols_em_speciated_VOC", - "Toluene_lump", - "TRA", - "SO2_openburning_share", - "SO2_em_openburning", - "SO2_em_anthro", - "SO2_em_SOLID_BIOFUEL_anthro", - "SO2_em_AIR_anthro", - "SO2", - "SLV", - "SHP", - "RSLossRem", - "RCO", - "OC_openburning_share", - "OC_em_openburning", - "OC_em_anthro", - "OC_em_SOLID_BIOFUEL_anthro", - "OC_em_AIR_anthro", - "OC", - "NOx_openburning_share", - "NOx_em_openburning", - "NOx_em_anthro", - "NOx_em_SOLID_BIOFUEL_anthro", - "NOx_em_AIR_anthro", - "NOx", - "NMVOC_openburning_share", - "NMVOC_em_openburning", - "NMVOC_em_anthro", - "NMVOC_em_SOLID_BIOFUEL_anthro", - "NMVOC_em_AIR_anthro", - "NMVOC_Toluene_lump_speciated_VOC_openburning_share", - "NMVOC_Toluene_lump_em_speciated_VOC_openburning", - "NMVOC_MEK_speciated_VOC_openburning_share", - "NMVOC_MEK_em_speciated_VOC_openburning", - "NMVOC_Higher_Alkenes_speciated_VOC_openburning_share", - "NMVOC_Higher_Alkenes_em_speciated_VOC_openburning", - "NMVOC_Higher_Alkanes_speciated_VOC_openburning_share", - "NMVOC_Higher_Alkanes_em_speciated_VOC_openburning", - "NMVOC_HOCH2CHO_speciated_VOC_openburning_share", - "NMVOC_HOCH2CHO_em_speciated_VOC_openburning", - "NMVOC_HCOOH_speciated_VOC_openburning_share", - "NMVOC_HCOOH_em_speciated_VOC_openburning", - "NMVOC_HCN_speciated_VOC_openburning_share", - "NMVOC_HCN_em_speciated_VOC_openburning", - "NMVOC_CH3OH_speciated_VOC_openburning_share", - "NMVOC_CH3OH_em_speciated_VOC_openburning", - "NMVOC_CH3COOH_speciated_VOC_openburning_share", - "NMVOC_CH3COOH_em_speciated_VOC_openburning", - "NMVOC_CH3COCHO_speciated_VOC_openburning_share", - "NMVOC_CH3COCHO_em_speciated_VOC_openburning", - "NMVOC_CH2O_speciated_VOC_openburning_share", - "NMVOC_CH2O_em_speciated_VOC_openburning", - "NMVOC_C8H10_speciated_VOC_openburning_share", - "NMVOC_C8H10_em_speciated_VOC_openburning", - "NMVOC_C7H8_speciated_VOC_openburning_share", - "NMVOC_C7H8_em_speciated_VOC_openburning", - "NMVOC_C6H6_speciated_VOC_openburning_share", - "NMVOC_C6H6_em_speciated_VOC_openburning", - "NMVOC_C5H8_speciated_VOC_openburning_share", - "NMVOC_C5H8_em_speciated_VOC_openburning", - "NMVOC_C3H8_speciated_VOC_openburning_share", - "NMVOC_C3H8_em_speciated_VOC_openburning", - "NMVOC_C3H6_speciated_VOC_openburning_share", - "NMVOC_C3H6_em_speciated_VOC_openburning", - "NMVOC_C3H6O_speciated_VOC_openburning_share", - "NMVOC_C3H6O_em_speciated_VOC_openburning", - "NMVOC_C2H6_speciated_VOC_openburning_share", - "NMVOC_C2H6_em_speciated_VOC_openburning", - "NMVOC_C2H6S_speciated_VOC_openburning_share", - "NMVOC_C2H6S_em_speciated_VOC_openburning", - "NMVOC_C2H5OH_speciated_VOC_openburning_share", - "NMVOC_C2H5OH_em_speciated_VOC_openburning", - "NMVOC_C2H4_speciated_VOC_openburning_share", - "NMVOC_C2H4_em_speciated_VOC_openburning", - "NMVOC_C2H4O_speciated_VOC_openburning_share", - "NMVOC_C2H4O_em_speciated_VOC_openburning", - "NMVOC_C2H2_speciated_VOC_openburning_share", - "NMVOC_C2H2_em_speciated_VOC_openburning", - "NMVOC_C10H16_speciated_VOC_openburning_share", - "NMVOC_C10H16_em_speciated_VOC_openburning", - "NMVOC", - "NH3_openburning_share", - "NH3_em_openburning", - "NH3_em_anthro", - "NH3_em_SOLID_BIOFUEL_anthro", - "NH3_em_AIR_anthro", - "NH3", - "N2O", - "MEK", - "IND", - "Higher_Alkenes", - "Higher_Alkanes", - "HOCH2CHO", - "HCOOH", - "HCN", - "H2_openburning_share", - "H2_em_openburning", - "H2SO4_mass", - "H2", - "ENE", - "Delta14co2_in_air", - "CO_openburning_share", - "CO_em_openburning", - "CO_em_anthro", - "CO_em_SOLID_BIOFUEL_anthro", - "CO_em_AIR_anthro", - "CO2_em_anthro", - "CO2_em_AIR_anthro", - "CO2", - "CO", - "CH4_openburning_share", - "CH4_em_openburning", - "CH4_em_anthro", - "CH4_em_SOLID_BIOFUEL_anthro", - "CH4_em_AIR_anthro", - "CH4", - "CH3OH", - "CH3COOH", - "CH3COCHO", - "CH2O", - "C8H10", - "C7H8", - "C6H6", - "C5H8", - "C3H8", - "C3H6O", - "C3H6", - "C2H6S", - "C2H6", - "C2H5OH", - "C2H4O", - "C2H4", - "C2H2", - "C10H16", - "BC_openburning_share", - "BC_em_openburning", - "BC_em_anthro", - "BC_em_SOLID_BIOFUEL_anthro", - "BC_em_AIR_anthro", - "BC", - "AIR", - "AGR", - ], -} - -SUPPORTED_EXPERIMENTS = [ - "ssp585", - "ssp370-lowNTCF", - "ssp370", - "ssp245", - "ssp126", - "piControl", - "piClim-spAer-anthro", - "piClim-spAer-aer", - "piClim-lu", - "piClim-histnat", - "piClim-histghg", - "piClim-histall", - "piClim-histaer", - "piClim-ghg", - "piClim-control", - "piClim-anthro", - "piClim-aer", - "piClim-N2O", - "piClim-CH4", - "piClim-4xCO2", - "piClim-2xss", - "piClim-2xdust", - "piClim-2xVOC", - "piClim-2xDMS", - "pdSST-piArcSIC", - "pdSST-pdSIC", - "pdSST-futArcSIC", - "midHolocene", - "lig127k", - "historical", - "histSST-piNTCF", - "histSST-piAer", - "histSST", - "hist-spAer-all", - "hist-piNTCF", - "hist-piAer", - "hist-nat", - "hist-aer", - "hist-GHG", - "amip", -] -# filepath to var to res Mapping -VAR_RES_MAPPING_PATH = "/home/charlie/Documents/MILA/causalpaca/data/data_description/mappings/variableid2tableid.csv" - - -GRIDDING_HIERACHY = ["gn"] - -# skip subhr because only diagnostics for specific places -REMOVE_RESOLUTONS = ["suhbr"] # resolution endings to remove e.g. kick CFsubhr if this contains 'subhr' - - -RES_TO_CHUNKSIZE = {"year": 1, "mon": 12, "6hr": 1460, "3hr": 2920, "day": 364} diff --git a/climateset/download/constants/esm_constants.py b/climateset/download/constants/esm_constants.py deleted file mode 100644 index 780fbbe..0000000 --- a/climateset/download/constants/esm_constants.py +++ /dev/null @@ -1,82 +0,0 @@ -### DOWNLOADER PARAMS ########################################################## - -# these resolutions are stored in RESOLUTION -TEMP_RES = 0 -VERT_RES = 0 -LON_RES = 0 -LAT_RES = 0 - -# resolution of the end-data-product -RESOLUTION = (TEMP_RES, VERT_RES, LON_RES, LAT_RES) - -# list of years that are considered for the data -YEARS = [0] - - -# distinction not necessary for the mother as we are first just providing data not designing the loader yet, and a -# lookup table to check where to downloda what from anyway -# # variables used as input for the climate model -# IN_VARS = [] -# -# # predicted / target variables of the climate model -# OUT_VARS = [] -# # suggestion charlie -# VARS = ["nan"] -# # Julia: Birth has three steps: downloading, preprocessing, creating the different resolutions -# # and we already need to distinct between in_vars and out_vars for that - - -CO2 = ["CO2", "CO2_em_anthro", "CO2_em_openburning", "CO2_em_AIR_anthro"] -BC = ["BC", "BC_em_anthro", "BC_em_openburning", "BC_em_AIR_anthro"] -CH4 = ["CH4", "CH4_em_anthro", "CH4_em_openburning", "CH4_em_AIR_anthro"] -SO2 = ["SO2", "SO2_em_anthro", "SO2_em_openburning", "SO2_em_AIR_anthro"] - -IN_VARS = CO2 + BC + CH4 + SO2 -OUT_VARS = ["pr", "tas"] - -VARS = IN_VARS + OUT_VARS - -# scenarios -SCENARIOS = ["historical", "ssp126", "ssp245", "ssp370", "ssp585"] -ADDITIONAL_SCENARIOS = ["hist-aer", "hist-GHG", "piControl", "ssp370-lowNTCF"] - -# model -MODELS = ["nan"] - -# number of ensemble members to be considered -NUM_ENSEMBLE = 1 - -# which type of grid -GRID = "grid" - -### RAW PROCESSER PARAMS ####################################################### -# you will see after downloading - -### RESOLUTION PROCESSER PARAMS ################################################ - -# THIS must be moved somewhere else, because it's not static -# tuple of "means" of preprocesser for each variable, e.g. -# [("CO2", "mean"), ["CH4", "median"] -CHOSEN_AGGREGATIONS = [ - "MeanAggregation", - "MinAggregation", - "MaxAggregation", - "InstAggregation", -] -# TODO communicate to other persons which data structure etc you use here -CHOSEN_INTERPOLATIONS = {"nan"} -# TODO create a fixed list for all vars: which aggregation and interpolation - -### ALL PARAMS IN DICT ######################################################### -CORE_PARAMS = { - "models": MODELS, - "scenarios": SCENARIOS, - "years": YEARS, - "in_vars": IN_VARS, - "out_vars": OUT_VARS, - "vars": VARS, - "resolutions": RESOLUTION, - "grid": GRID, - "aggregations": CHOSEN_AGGREGATIONS, - "interpolations": CHOSEN_INTERPOLATIONS, -} diff --git a/climateset/download/constants/input4mips.py b/climateset/download/constants/input4mips.py new file mode 100644 index 0000000..7c6643a --- /dev/null +++ b/climateset/download/constants/input4mips.py @@ -0,0 +1,45 @@ +# TODO add VAR_SOURCE_LOOKUP with raw variables +# TODO add supported experiments +# TODO do we really need emission endings, meta_endings_prc, meta_endings_shar?? how is this used so far? +# pylint: disable=C0103 +from dataclasses import dataclass +from typing import Final + +from climateset.utils import get_yaml_config + + +@dataclass(frozen=True) +class Input4MIPSConstants: + """ + Data class to represent Input4MIPS constants that are used by the download module. + + Attributes: + NODE_LINK : Node link is used to run an ESGF search + EMISSION_ENDINGS : File endings for emission variables + META_ENDINGS_PRC : File endings for PRC meta variables + META_ENDINGS_SHARE : File endings for SHARE meta variables + VAR_SOURCE_LOOKUP : Model and raw variables + """ + + NODE_LINK: Final[str] + EMISSIONS_ENDINGS: Final[tuple[str, ...]] + META_ENDINGS_PRC: Final[tuple[str, ...]] + META_ENDINGS_SHAR: Final[tuple[str, ...]] + MIP_ERA: Final[str] + TARGET_MIP: Final[str] + SUPPORTED_EXPERIMENTS: Final[tuple[str, ...]] + VAR_SOURCE_LOOKUP: Final[tuple[str, ...]] + + +_data = get_yaml_config("downloader/constants/imput4MIPs.yaml") + +INPUT4MIPS_CONSTANTS = Input4MIPSConstants( + NODE_LINK=_data["node_link"], + EMISSIONS_ENDINGS=tuple(_data["emissions_endings"]), + META_ENDINGS_PRC=tuple(_data["meta_endings_prc"]), + META_ENDINGS_SHAR=tuple(_data["meta_endings_shar"]), + MIP_ERA=_data["mip_era"], + TARGET_MIP=_data["target_mip"], + SUPPORTED_EXPERIMENTS=tuple(_data["supported_experiments"]), + VAR_SOURCE_LOOKUP=tuple(_data["var_source_lookup"]), +) diff --git a/climateset/download/constants/omip.py b/climateset/download/constants/omip.py new file mode 100644 index 0000000..abcdb10 --- /dev/null +++ b/climateset/download/constants/omip.py @@ -0,0 +1,22 @@ +class OmipConstants: + """ + Attributes: + NODE_LINK (str): Where the data can be accessed + MODEL_SOURCES (List): Identifiers for supported climate models + VAR_SOURCE_LOOKUP (Dict>): model and raw variables + SUPPORTED_EXPERIMENTS (list): experiments of climate models (runs) that are supported + """ + + NODE_LINK = "http://esgf-data2.llnl.gov" + + MODEL_SOURCES = [ + "NorESM2-LM", + ] + + VAR_SOURCE_LOOKUP = [ + "omldamax", + ] + + SUPPORTED_EXPERIMENTS = [ + "omip1", + ] diff --git a/climateset/download/downloader.py b/climateset/download/downloader.py index 8ddd16f..0d965ac 100644 --- a/climateset/download/downloader.py +++ b/climateset/download/downloader.py @@ -1,485 +1,42 @@ import logging import pathlib -from typing import Union -import pandas as pd -from pyesgf.search import SearchConnection - -from climateset import RAW_DATA -from climateset.download.constants.data_constants import ( - EMISSIONS_ENDINGS, - META_ENDINGS_PRC, - META_ENDINGS_SHAR, -) -from climateset.download.constants.esgf_server import ( - MODEL_SOURCES, - SUPPORTED_EXPERIMENTS, - VAR_SOURCE_LOOKUP, -) -from climateset.download.utils import ( - _handle_base_search_constraints, - download_metadata_variable, - download_model_variable, - download_raw_input_variable, - get_max_ensemble_member_number, - get_select_model_scenarios, - get_upload_version, -) -from climateset.utils import create_logger, get_keys_from_value, get_yaml_config +from climateset.download.cmip6_downloader import cmip6_download_from_config +from climateset.download.constants.esgf import CMIP6, INPUT4MIPS +from climateset.download.downloader_config import AVAILABLE_CONFIGS +from climateset.download.input4mips_downloader import input4mips_download_from_config +from climateset.download.utils import match_key_in_list +from climateset.utils import create_logger, get_yaml_config LOGGER = create_logger(__name__) -class Downloader: - """ - Class handling the downloading of the data. - - It communicates with the esgf nodes to search and download the specified data. - """ - - # TODO Fix complexity issue - def __init__( # noqa: C901 - self, - model: Union[str, None] = "NorESM2-LM", # default as in ClimateBench - experiments: list[str] = None, # sub-selection of ClimateBench default - variables: list[str] = None, - data_dir: str = RAW_DATA, - max_ensemble_members: int = 10, # if -1 take all - ensemble_members: list[str] = None, # preferred ensemble members used, if None not considered - overwrite: bool = False, # flag if files should be overwritten - download_biomassburning: bool = True, # get biomassburning data for input4mips - download_metafiles: bool = True, # get input4mips meta files - use_plain_emission_vars: bool = True, # specifies if plain variables are given and rest is inferred - logger: logging.Logger = LOGGER, - ): - """ - Init method for the Downloader. - - Args: - model: Model ID from which output should be downloaded. A list of all supported model ids can - be found in parameters.constants.MODEL_SOURCES. Model data only. - experiments: List of simulations from which data should be downloaded. Model data only. - experiments: List of variables for which data should be downloaded. Both model and raw data. - data_dir: Relative or absolute path to the directory where data should be stored. Will be created - if not yet existent. - meta_dir: Relative or absolute path to the directory where the metadata should be sored. Will be - created if not yet existent. - overwrite: Flag if files should be overwritten, if they already exist. - download_biomassburning: Flag if biomassburning data for input4mips variables should be downloaded. - download_metafiles: Flag if metafiles for input4mips variables should be downloaded. - """ - # Args init - self.logger = logger - self.model: str = model - self.model_node_link: str = "" - self.model_source_center: str = "" - if experiments is None: - experiments = [ - "historical", - "ssp370", - "hist-GHG", - "piControl", - "ssp434", - "ssp126", - ] - # TODO: have a list of supported experiments before trying to look for them on the node - # to reduce computation cost - self.experiments: list[str] = experiments - self.raw_vars: list[str] = [] - self.model_vars: list[str] = [] - self.biomass_vars: list[str] = [] - self.meta_vars_percentage: list[str] = [] - self.meta_vars_share: list[str] = [] - self.data_dir: Union[str, pathlib.Path] = data_dir - self.ensemble_members: list[str] = ensemble_members - self.max_ensemble_members: int = max_ensemble_members - self.overwrite: bool = overwrite - self.download_metafiles: bool = download_metafiles - self.download_biomass_burning: bool = download_biomassburning - self.use_plain_emission_vars: bool = use_plain_emission_vars - - # Args processing - selected_scenarios = get_select_model_scenarios() - self._hande_max_possible_member_number( - df_model_source=selected_scenarios, max_ensemble_members=max_ensemble_members - ) - self._handle_variables( - variables=variables, - ) - self._handle_model_params() - - # - # Internal helper functions for class init - # - def _hande_max_possible_member_number(self, df_model_source: pd.DataFrame, max_ensemble_members: int): - max_possible_member_number = get_max_ensemble_member_number( - df_model_source=df_model_source, experiments=self.experiments, model=self.model - ) - if max_ensemble_members == -1: - self.logger.info("Trying to take all ensemble members available.") - self.max_ensemble_members = max_possible_member_number - # verify that we have enough members for wanted experiments - # else choose the smallest available for all - if max_ensemble_members > max_possible_member_number: - self.logger.info("Not enough members available. Choosing smallest maximum.") - self.max_ensemble_members = max_possible_member_number - self.logger.info(f"Downloading data for {self.max_ensemble_members} members.") - - def _handle_variables(self, variables: list[str]): - self._generate_variables(variables=variables) - self._generate_plain_emission_vars() - self.logger.info(f"Raw variables to download: {self.raw_vars}") - self.logger.info(f"Model predicted vars to download: {self.model_vars}") - if self.download_biomass_burning: - self.logger.info(f"Download biomass burning vars: {self.biomass_vars}") - if self.download_metafiles: - self.logger.info(f"Downloading meta vars:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}") - - def _handle_model_params(self): - try: - self.model_node_link = MODEL_SOURCES[self.model]["node_link"] - self.model_source_center = MODEL_SOURCES[self.model]["center"] - except KeyError: - self.model = next(iter(MODEL_SOURCES)) - if self.model is not None: - self.logger.info(f"WARNING: Model {self.model} unknown. Using default instead.") - self.logger.info(f"Using : {self.model}") - self.model_node_link = MODEL_SOURCES[self.model]["node_link"] - self.model_source_center = MODEL_SOURCES[self.model]["center"] - - def _generate_plain_emission_vars(self): - if self.use_plain_emission_vars: - # plain vars are biomass vars - self.biomass_vars = self.raw_vars - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in META_ENDINGS_PRC - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in META_ENDINGS_SHAR - ] - - self.raw_vars = [ - variable + emission_ending for variable in self.raw_vars for emission_ending in EMISSIONS_ENDINGS - ] - # be careful with CO2 - if "CO2_em_openburning" in self.raw_vars: - self.raw_vars.remove("CO2_em_openburning") - else: - # get plain input4mips vars = biomass vars for historical - self.biomass_vars = list({v.split("_")[0] for v in self.raw_vars}) - # remove biomass vars from normal raw vars list - for b in self.biomass_vars: - try: - self.raw_vars.remove(b) - except Exception as error: - self.logger.warning(f"Caught the following exception but continuing : {error}") - - self.meta_vars_percentage = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in META_ENDINGS_PRC - ] - self.meta_vars_share = [ - biomass_var + ending - for biomass_var in self.biomass_vars - if biomass_var != "CO2" - for ending in META_ENDINGS_SHAR - ] - - def _generate_variables(self, variables: list[str]): - if variables is None: - variables = ["tas", "pr", "SO2_em_anthro", "BC_em_anthro"] - variables = [v.replace(" ", "_").replace("-", "_") for v in variables] - self.logger.info(f"Cleaned variables : {variables}") - for v in variables: - t = get_keys_from_value(d=VAR_SOURCE_LOOKUP, val=v, logger=self.logger) - if t == "model": - self.model_vars.append(v) - elif t == "raw": - self.raw_vars.append(v) - - else: - self.logger.info(f"WARNING: unknown source type for var {v}. Not supported. Skipping.") - - # - # Class functions - # - def download_from_model_single_var( # noqa: C901 - self, - variable: str, - experiment: str, - project: str = "CMIP6", - default_frequency: str = "mon", - preferred_version: str = "latest", - default_grid_label: str = "gn", - ): - """ - Function handling the download of a single variable-experiment pair that is associated with a model's output - (CMIP data). - - Args: - variable: variable ID - experiment: experiment ID - project: umbrella project id e.g. CMIPx - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - default_grid_label: default gridding method in which the data is provided - """ - conn = SearchConnection(url=self.model_node_link, distrib=False) - - facets = ( - "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " - "version, grid_label, experiment_id" - ) - - self.logger.info("Using download_from_model_single_var() function") - - ctx = conn.new_context( - project=project, - experiment_id=experiment, - source_id=self.model, - variable=variable, - facets=facets, - ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - variants = list(ctx.facet_counts["variant_label"]) - - self.logger.info(f"Available variants : {variants}\n") - self.logger.info(f"Length : {len(variants)}") - - # TODO refactor logic of if/else - if not self.ensemble_members: - if self.max_ensemble_members > len(variants): - self.logger.info("Less ensemble members available than maximum number desired. Including all variants.") - ensemble_member_final_list = variants - else: - self.logger.info( - f"{len(variants)} ensemble members available than desired (max {self.max_ensemble_members}. " - f"Choosing only the first {self.max_ensemble_members}.)." - ) - ensemble_member_final_list = variants[: self.max_ensemble_members] - else: - self.logger.info(f"Desired list of ensemble members given: {self.ensemble_members}") - ensemble_member_final_list = list(set(variants) & set(self.ensemble_members)) - if len(ensemble_member_final_list) == 0: - self.logger.info("WARNING: no overlap between available and desired ensemble members!") - self.logger.info("Skipping.") - return None - - for ensemble_member in ensemble_member_final_list: - self.logger.info(f"Ensembles member: {ensemble_member}") - ctx_ensemble = ctx.constrain(variant_label=ensemble_member) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx_ensemble = ctx_ensemble.constrain(version=version) - - results = ctx_ensemble.search() - - self.logger.info(f"Result len {len(results)}") - - download_model_variable( - model_id=self.model, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_raw_input_single_var( # noqa: C901 - self, - variable: str, - project: str = "input4mips", - institution_id: str = "PNNL-JGCRI", - default_frequency: str = "mon", - preferred_version: str = "latest", - default_grid_label: str = "gn", - ): - """ - Function handling the download of all input4mips data associated with a single variable. - - Args: - variable: variable ID - project: umbrella project, here "input4mips" - institution_id: id of the institution that provides the data - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - default_grid_label: default gridding method in which the data is provided - """ - self.logger.info("Using download_raw_input_single_var() function") - - facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" - conn = SearchConnection(url=self.model_node_link, distrib=False) - - ctx = conn.new_context( - project=project, - variable=variable, - institution_id=institution_id, - facets=facets, - ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - mips_targets = list(ctx.facet_counts["target_mip"]) - self.logger.info(f"Available target mips: {mips_targets}") - - for target in mips_targets: - ctx_target = ctx.constrain(target_mip=target) - version = get_upload_version(context=ctx_target, preferred_version=preferred_version) - if version: - ctx_target = ctx_target.constrain(version=version) - - results = ctx_target.search() - self.logger.info(f"Result len {len(results)}") - if len(results) > 0: - download_raw_input_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_meta_historic_biomassburning_single_var( - self, - variable: str, - institution_id: str, - project: str = "input4mips", - default_grid_label: str = "gn", - default_frequency: str = "mon", - preferred_version: str = "latest", - ): - """ - Function handling the download of all metadata associated with a single input4mips variable. - - Args: - variable: variable ID - project: umbrella project - institution_id: id of the institution that provides the data - default_grid_label: default gridding method in which the data is provided - default_frequency: default frequency to download - preferred_version: data upload version, if 'latest', the newest version will get selected always - """ - variable_id = variable.replace("_", "-") - variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" - self.logger.info(variable, variable_id, institution_id) - conn = SearchConnection(url=self.model_node_link, distrib=False) - facets = "nominal_resolution,version" - ctx = conn.new_context( - project=project, - variable=variable_search, - variable_id=variable_id, - institution_id=institution_id, - target_mip="CMIP", - facets=facets, - ) - - ctx = _handle_base_search_constraints(ctx, default_frequency, default_grid_label) - - version = get_upload_version(context=ctx, preferred_version=preferred_version) - if version: - ctx = ctx.constrain(version=version) - - results = ctx.search() - self.logger.info(f"Result len {len(results)}") - - result_list = [r.file_context().search() for r in results] - self.logger.info(f"List of results :\n{result_list}") - - download_metadata_variable( - institution_id=institution_id, search_results=results, variable=variable, base_path=self.data_dir - ) - - def download_from_model(self): - """ - Function handling the download of all variables that are associated with a model's output. - - Searches for all files associated with the respected variables and experiment that the downloader - was initialized with. - - A search connection is established and the search is iteratively constraint to meet all specifications. - Data is downloaded and stored in a separate file for each year. The default format is netCDF4. - - Resulting hierarchy: - - `CMIPx/model_id/ensemble_member/experiment/variable/nominal_resolution/frequency/year.nc` - - If the constraints cannot be met, per default behaviour for the downloader to select first other - available value - """ - - for variable in self.model_vars: - self.logger.info(f"Downloading data for variable: {variable}") - for experiment in self.experiments: - if experiment in SUPPORTED_EXPERIMENTS: - self.logger.info(f"Downloading data for experiment: {experiment}") - self.download_from_model_single_var(variable=variable, experiment=experiment) - else: - self.logger.info( - f"Chosen experiment {experiment} not supported. All supported experiments: " - f"{SUPPORTED_EXPERIMENTS}. Skipping." - ) - - def download_raw_input(self): - """ - Function handling the download of all variables that are associated with a model's input (input4mips). - - Searches for all files associated with the respected variables that the downloader was initialized with. - A search connection is established and the search is iteratively constraint to meet all specifications. - Data is downloaded and stored in a separate file for each year. The default format is netCDF4. - - Resulting hierarchy: - - `input4mips/experiment/variable/nominal_resolution/frequency/year.nc` - - If the constraints cannot be met, the default behaviour for the downloader is to select first other - available value. - """ - for variable in self.raw_vars: - if variable.endswith("openburning"): - institution_id = "IAMC" - else: - institution_id = "PNNL-JGCRI" - self.logger.info(f"Downloading data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id=institution_id) - - if self.download_biomass_burning & ("historical" in self.experiments): - for variable in self.biomass_vars: - self.logger.info(f"Downloading biomassburing data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id="VUA") - - if self.download_metafiles: - for variable in self.meta_vars_percentage: - # percentage are historic and have no scenarios - self.logger.info(f"Downloading meta percentage data for variable: {variable}") - self.download_meta_historic_biomassburning_single_var(variable=variable, institution_id="VUA") - for variable in self.meta_vars_share: - self.logger.info(f"Downloading meta openburning share data for variable: {variable}") - self.download_raw_input_single_var(variable=variable, institution_id="IAMC") - - -def download_from_config_file(config: str, logger: logging.Logger = LOGGER): +def download_from_config_file(config_file: str | pathlib.Path, logger: logging.Logger = LOGGER): """ This function downloads variables automatically from input config file Args: - config: Can be a dictionary containing configurations or a path to a configuration yaml file + config_file: Path to a configuration yaml file logger: Logging instance """ - if not isinstance(config, dict): - if isinstance(config, str): - config = pathlib.Path(config) - config = get_yaml_config(config) - try: - models = config["models"] - except KeyError as e: - logger.warning(f"Caught the following exception but continuing : {e}") - logger.info("No climate models specified. Assuming only input4mips data should be downloaded.") - models = [None] - downloader_kwargs = config["downloader_kwargs"] - logger.info(f"Downloader kwargs : {downloader_kwargs}") - for m in models: - downloader = Downloader(model=m, **downloader_kwargs, logger=logger) - downloader.download_raw_input() - if m is not None: - downloader.download_from_model() + if isinstance(config_file, str): + config_file = pathlib.Path(config_file) + config_dict = get_yaml_config(config_file) + + downloader_factory = { + INPUT4MIPS: input4mips_download_from_config, + CMIP6: cmip6_download_from_config, + } + + verified_config_keys = [] + for config_key in config_dict: + verified_key = match_key_in_list(input_key=config_key, key_list=AVAILABLE_CONFIGS) + if verified_key: + verified_config_keys.append(verified_key) + else: + logger.error( + f"Input project [{config_key}] from [{config_file}]was not found in available projects. " + "Removing it from download list" + ) + + for config_key in verified_config_keys: + downloader_factory[config_key](config=config_file) diff --git a/climateset/download/downloader_config.py b/climateset/download/downloader_config.py new file mode 100644 index 0000000..4d3eca9 --- /dev/null +++ b/climateset/download/downloader_config.py @@ -0,0 +1,264 @@ +import copy +import inspect +import logging +from pathlib import Path + +import yaml + +from climateset import CONFIGS, RAW_DATA +from climateset.download.constants.esgf import ( + CMIP6, + ESGF_PROJECTS, + ESGF_PROJECTS_CONSTANTS, + INPUT4MIPS, +) +from climateset.download.utils import match_key_in_list +from climateset.utils import create_logger, get_yaml_config + +LOGGER = create_logger(__name__) + +AVAILABLE_CONFIGS = frozenset([CMIP6, INPUT4MIPS]) + + +class BaseDownloaderConfig: + def __init__( + self, + project: str, + data_dir: str | Path = RAW_DATA, + experiments: list[str] = None, + variables: list[str] = None, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + self.logger = logger + + self.project = "" + uppercase_project = project.upper() + for p in ESGF_PROJECTS: + if p.upper() == uppercase_project: + self.project = p + + if self.project not in ESGF_PROJECTS: + self.logger.error(f"Project {self.project} has not been implemented in the Downloader yet.") + raise ValueError( + f"Project {self.project} is not recognized. Consider adding a constant class in download/constants and " + f"the esgf.py file." + ) + + if isinstance(data_dir, str): + data_dir = Path(data_dir) + self.data_dir = data_dir + + self.experiments = experiments + self.variables = variables + self.overwrite = overwrite + + # init shared constants + self.proj_constants = ESGF_PROJECTS_CONSTANTS[self.project] + self.node_link = self.proj_constants.NODE_LINK + self.avail_variables = self.proj_constants.VAR_SOURCE_LOOKUP + self.avail_experiments = self.proj_constants.SUPPORTED_EXPERIMENTS + self.config_is_valid = True + + self._validate_item_list( + item_list=self.variables, available_items=self.avail_variables, name_of_item="variable" + ) + self._validate_item_list( + item_list=self.experiments, available_items=self.avail_experiments, name_of_item="experiment" + ) + + def _validate_item_list(self, item_list: list[str], available_items: list[str], name_of_item: str) -> None: + """ + This small function checks that the given items (variables, models, experiments, etc.) are valid for their given + project (Input4MIPs, CMIP6, etc.). + + Also remove unvalid items from the list of items as to not. + + Args: + item_list: List of items to check (like self.variables, self.experiments, etc.) + available_items: List of available items against which to check (like self.avail_variables, etc.) + name_of_item: Name of item to check. Write lowercase and singular: ie. variable, experiment, etc. + + Returns: + None + """ + error_in_item_list = False + for e in item_list: + if e not in available_items: + self.logger.error(f"{name_of_item.capitalize()} [{e}] not supported.") + item_list.remove(e) + error_in_item_list = True + if error_in_item_list: + self.logger.error(f"Some, or all submitted {name_of_item}s were not found found - Please verify") + self.logger.error(f"Available {name_of_item}s: {available_items}") + self.logger.warning(f"List of valid submitted {name_of_item}s: {available_items}") + self.config_is_valid = False + + @staticmethod + def _handle_yaml_config_path(config_file_name, config_path): + if isinstance(config_path, str): + config_path = Path(config_path) + if not config_file_name.endswith(".yaml"): + config_file_name = f"{config_file_name}.yaml" + config_full_path = config_path / config_file_name + return config_full_path + + def generate_config_dict(self): + init_params = inspect.signature(self.__init__).parameters + init_args = set(init_params.keys()) - {"self"} + config_dict = {self.project: {}} + for key, value in self.__dict__.items(): + if key in init_args and key not in ["project", "logger"] and not callable(value): + config_dict[self.project][key] = value + return config_dict + + def generate_config_file(self, config_file_name: str, config_path: str | Path = CONFIGS) -> None: + config_full_path = self._handle_yaml_config_path(config_file_name, config_path) + data = self.generate_config_dict() + with open(config_full_path, "w", encoding="utf-8") as config_file: + yaml.dump(data, config_file, indent=2) + + def add_to_config_file(self, config_file_name: str, config_path: str | Path = CONFIGS) -> None: + config_full_path = self._handle_yaml_config_path(config_file_name, config_path) + existing_config = {} + if config_full_path.exists(): + existing_config = get_yaml_config(config_full_path) + existing_config.update(existing_config) + new_config = self.generate_config_dict() + existing_config.update(new_config) + with open(config_full_path, "w", encoding="utf-8") as config_file: + yaml.dump(existing_config, config_file, indent=2) + + +class Input4mipsDownloaderConfig(BaseDownloaderConfig): + def __init__( + self, + project: str, + data_dir: str = RAW_DATA, + experiments: list[str] = None, + variables: list[str] = None, + download_biomassburning: bool = True, # get biomassburning data for input4mips + download_metafiles: bool = True, # get input4mips meta files + use_plain_emission_vars: bool = True, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + super().__init__(project, data_dir, experiments, variables, overwrite, logger) + + self.download_metafiles: bool = download_metafiles # TODO infer automatically from vars + self.download_biomass_burning: bool = download_biomassburning # TODO infer automatically from vars + self.use_plain_emission_vars: bool = use_plain_emission_vars + self.emissions_endings = self.proj_constants.EMISSIONS_ENDINGS + self.meta_endings_prc = self.proj_constants.META_ENDINGS_PRC + self.meta_endings_share = self.proj_constants.META_ENDINGS_SHAR + self.mip_area = self.proj_constants.MIP_ERA + self.target_mip = self.proj_constants.TARGET_MIP + + # Attributes that are going to be retrieved / set within this class for + # (all) + # (climate model inputs) + self.biomass_vars: list[str] = [] + self.meta_vars_percentage: list[str] = [] + self.meta_vars_share: list[str] = [] + + self._handle_emission_variables() + + def _handle_emission_variables(self): + self._generate_raw_emission_vars() + self._generate_plain_emission_vars() + self.logger.info(f"Emission variables to download: {self.variables}") + if self.download_biomass_burning: + self.logger.info(f"Biomass burning vars to download: {self.biomass_vars}") + if self.download_metafiles: + self.logger.info( + f"Meta emission vars to download:\n\t{self.meta_vars_percentage}\n\t{self.meta_vars_share}" + ) + + def _generate_raw_emission_vars(self): + variables = copy.deepcopy(self.variables) + if variables is None: + raise ValueError("No variables have been given to the downloader. Variables must be given for downloader.") + self.variables = [v.replace(" ", "_").replace("-", "_") for v in variables] + self.logger.info(f"Cleaned variables : {self.variables}") + + def _generate_plain_emission_vars(self): + if self.use_plain_emission_vars: + # plain vars are biomass vars + self.biomass_vars = self.variables + self.variables = [ + variable + emission_ending for variable in self.variables for emission_ending in self.emissions_endings + ] + # be careful with CO2 + if "CO2_em_openburning" in self.variables: + self.variables.remove("CO2_em_openburning") + else: + # get plain input4mips vars = biomass vars for historical + self.biomass_vars = list({v.split("_")[0] for v in self.variables}) + # remove biomass vars from normal vars list + for b in self.biomass_vars: + try: + self.variables.remove(b) + except Exception as error: # pylint: disable=W0718 + self.logger.warning(f"Caught the following exception but continuing : {error}") + + self.meta_vars_percentage = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_prc + ] + self.meta_vars_share = [ + biomass_var + ending + for biomass_var in self.biomass_vars + if biomass_var != "CO2" + for ending in self.meta_endings_share + ] + + +class CMIP6DownloaderConfig(BaseDownloaderConfig): + def __init__( + self, + project: str, + data_dir: str = RAW_DATA, + models: list[str] = None, + experiments: list[str] = None, + ensemble_members: list[str] = None, # preferred ensemble members used, if None not considered + max_ensemble_members: int = 10, # if -1 take all + variables: list[str] = None, + overwrite: bool = False, + logger: logging.Logger = LOGGER, + ): + super().__init__(project, data_dir, experiments, variables, overwrite, logger) + + if not models: + models = ["NorESM2-LM"] + if isinstance(models, str): + models = [models] + self.models: list[str] = models + self.avail_models = self.proj_constants.MODEL_SOURCES + self.ensemble_members: list[str] = ensemble_members + self.max_ensemble_members: int = max_ensemble_members + + self._validate_item_list(item_list=self.models, available_items=self.avail_models, name_of_item="model") + + +def _get_config_from_file(config_file, config_id, config_class, logger=LOGGER): + configs = get_yaml_config(config_file) + config_key = config_id + if config_key not in configs: + config_key = match_key_in_list(config_key, list(configs.keys())) + if not config_key: + logger.error(f"Config key [{config_id}] not found in config file [{config_file}]") + class_configs = configs[config_key] + config_object = config_class(project=config_id, **class_configs) + return config_object + + +def create_input4mips_downloader_config_from_file(config_file) -> Input4mipsDownloaderConfig: + config_object = _get_config_from_file(config_file, INPUT4MIPS, Input4mipsDownloaderConfig) + return config_object + + +def create_cmip6_downloader_config_from_file(config_file) -> CMIP6DownloaderConfig: + config_object = _get_config_from_file(config_file, CMIP6, CMIP6DownloaderConfig) + return config_object diff --git a/climateset/download/input4mips_downloader.py b/climateset/download/input4mips_downloader.py new file mode 100644 index 0000000..d2838fb --- /dev/null +++ b/climateset/download/input4mips_downloader.py @@ -0,0 +1,121 @@ +from climateset.download.abstract_downloader import AbstractDownloader +from climateset.download.constants.esgf import INPUT4MIPS +from climateset.download.downloader_config import ( + Input4mipsDownloaderConfig, + create_input4mips_downloader_config_from_file, +) +from climateset.download.utils import ( + search_and_download_esgf_biomass_single_var, + search_and_download_esgf_raw_single_var, +) +from climateset.utils import create_logger + +LOGGER = create_logger(__name__) + + +class Input4MipsDownloader(AbstractDownloader): + def __init__(self, config: Input4mipsDownloaderConfig): + self.config: Input4mipsDownloaderConfig = config + self.logger = LOGGER + + def download(self): + for variable in self.config.variables: + if variable.endswith("openburning"): + institution_id = "IAMC" + else: + institution_id = "PNNL-JGCRI" + self.logger.info(f"Downloading data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id=institution_id) + + if self.config.download_biomass_burning & ("historical" in self.config.experiments): + for variable in self.config.biomass_vars: + self.logger.info(f"Downloading biomassburing data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id="VUA") + + if self.config.download_metafiles: + for variable in self.config.meta_vars_percentage: + # percentage are historic and have no scenarios + self.logger.info(f"Downloading meta percentage data for variable: {variable}") + self.download_meta_historic_biomassburning_single_var(variable=variable, institution_id="VUA") + for variable in self.config.meta_vars_share: + self.logger.info(f"Downloading meta openburning share data for variable: {variable}") + self.download_raw_input_single_var(variable=variable, institution_id="IAMC") + + def download_raw_input_single_var( + self, + variable: str, + project: str = INPUT4MIPS, + institution_id: str = "PNNL-JGCRI", + default_frequency: str = "mon", + preferred_version: str = "latest", + default_grid_label: str = "gn", + ): + """ + Function handling the download of all input4mips data associated with a single variable. + + Args: + variable: variable ID + project: umbrella project, here "input4MIPs" + institution_id: id of the institution that provides the data + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + default_grid_label: default gridding method in which the data is provided + """ + self.logger.info("Using download_raw_input_single_var() function") + + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + results_list = search_and_download_esgf_raw_single_var( + variable=variable, + project=project, + institution_id=institution_id, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + preferred_version=preferred_version, + data_dir=self.config.data_dir, + ) + self.logger.info(f"Download results: {results_list}") + + def download_meta_historic_biomassburning_single_var( + self, + variable: str, + institution_id: str, + project: str = INPUT4MIPS, + default_grid_label: str = "gn", + default_frequency: str = "mon", + preferred_version: str = "latest", + ): + """ + Function handling the download of all metadata associated with a single input4mips variable. + + Args: + variable: variable ID + project: umbrella project + institution_id: id of the institution that provides the data + default_grid_label: default gridding method in which the data is provided + default_frequency: default frequency to download + preferred_version: data upload version, if 'latest', the newest version will get selected always + """ + variable_id = variable.replace("_", "-") + variable_search = f"percentage_{variable_id.replace('-', '_').split('_')[-1]}" + self.logger.info(variable, variable_id, institution_id) + + # Search context is sensitive to order and sequence, which is why + # it's done in different steps instead of putting everything in `new_context` + results = search_and_download_esgf_biomass_single_var( + variable=variable_search, + variable_id=variable_id, + project=project, + institution_id=institution_id, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + preferred_version=preferred_version, + base_path=self.config.data_dir, + ) + self.logger.info(f"Download results: {results}") + + +def input4mips_download_from_config(config): + config_object = create_input4mips_downloader_config_from_file(config) + downloader = Input4MipsDownloader(config=config_object) + downloader.download() diff --git a/climateset/download/utils.py b/climateset/download/utils.py index 4adc8e7..e1151ad 100644 --- a/climateset/download/utils.py +++ b/climateset/download/utils.py @@ -1,14 +1,15 @@ import logging -import pathlib import re import subprocess import time -from typing import Union +from pathlib import Path -import pandas as pd import xarray as xr +from pyesgf.search import SearchConnection +from pyesgf.search.context import DatasetSearchContext -from climateset import APP_ROOT, RAW_DATA +from climateset import RAW_DATA +from climateset.download.constants import NODE_LINK_URLS from climateset.utils import create_logger LOGGER = create_logger(__name__) @@ -31,7 +32,8 @@ def extract_target_mip_exp_name(filename: str, target_mip: str, logger: logging. if "covid" in filename: experiment = f"{experiment}_covid" elif target_mip == "CMIP": - if int(year_end) > 2015: + cutoff_year_for_historical = 2015 + if int(year_end) > cutoff_year_for_historical: logger.info(f"TARGET MIP : {filename}") experiment = f"ssp{filename.split('ssp')[-1][:3]}" else: @@ -94,7 +96,7 @@ def infer_nominal_resolution(ds: xr.Dataset, nominal_resolution: str, logger: lo degree = abs(ds.lon[0].item() - ds.lon[1].item()) nom_res = int(degree * 100) logger.info(f"Inferring nominal resolution: {nom_res}") - except Exception as error: + except Exception as error: # pylint: disable=W0718 logger.warning(f"Caught the following exception but continuing : {error}") return nom_res @@ -158,28 +160,24 @@ def _download_process(temp_download_path, search_results, logger: logging.Logger _download_result(result=result, download_path=temp_download_path, logger=logger) -def download_raw_input_variable( - institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA -): +def download_raw_input_variable(project, institution_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"raw_input_vars/{institution_id}/{variable}" + base_path = Path(base_path) + temp_download_path = base_path / f"{project}/raw_input_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) -def download_model_variable(model_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA): +def download_model_variable(project, model_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"model_vars/{model_id}/{variable}" + base_path = Path(base_path) + temp_download_path = base_path / f"{project}/{model_id}/{variable}" _download_process(temp_download_path, search_results) -def download_metadata_variable( - institution_id, search_results, variable, base_path: Union[str, pathlib.Path] = RAW_DATA -): +def download_metadata_variable(project, institution_id, search_results, variable, base_path: str | Path = RAW_DATA): if isinstance(base_path, str): - base_path = pathlib.Path(base_path) - temp_download_path = base_path / f"meta_vars/{institution_id}/{variable}" + base_path = Path(base_path) + temp_download_path = base_path / f"{project}/meta_vars/{institution_id}/{variable}" _download_process(temp_download_path, search_results) @@ -202,25 +200,6 @@ def get_grid_label(context, default_grid_label, logger=LOGGER): return grid_label -def get_max_ensemble_member_number(df_model_source: pd.DataFrame, experiments: list[str], model: str, logger=LOGGER): - if model is not None: - if model not in df_model_source["source_id"].tolist(): - logger.info(f"Model {model} not supported.") - raise AttributeError - model_id = df_model_source.index[df_model_source["source_id"] == model].values - # get ensemble members per scenario - max_ensemble_members_list = df_model_source["num_ensemble_members"][model_id].values.tolist()[0].split(" ") - scenarios = df_model_source["scenarios"][model_id].values.tolist()[0].split(" ") - max_ensemble_members_lookup = {} - for s, m in zip(scenarios, max_ensemble_members_list): - max_ensemble_members_lookup[s] = int(m) - filtered_experiments = (e for e in experiments if e != "historical") - max_possible_member_number = min( - max_ensemble_members_lookup[e] for e in filtered_experiments - ) # TODO fix historical - return max_possible_member_number - - def get_upload_version(context, preferred_version, logger=LOGGER): version = "" versions = [] @@ -262,7 +241,7 @@ def get_frequency(context, default_frequency, logger=LOGGER): return frequency -def _handle_base_search_constraints(ctx, default_frequency, default_grid_label): +def handle_base_search_constraints(ctx, default_frequency, default_grid_label): grid_label = get_grid_label(context=ctx, default_grid_label=default_grid_label) if grid_label: ctx = ctx.constrain(grid_label=grid_label) @@ -275,19 +254,252 @@ def _handle_base_search_constraints(ctx, default_frequency, default_grid_label): return ctx -def get_select_model_scenarios(path_to_file: Union[str, pathlib.Path] = None) -> pd.DataFrame: - """ - This function returns a dataframe based on input Json file. +def handle_yaml_config_path(config_file_name, config_path): + if isinstance(config_path, str): + config_path = Path(config_path) + if not config_file_name.endswith(".yaml"): + config_file_name = f"{config_file_name}.yaml" + config_full_path = config_path / config_file_name + return config_full_path + + +def match_key_in_list(input_key: str, key_list: list[str]) -> str | None: + for key in key_list: + if input_key.lower() == key.lower(): + return key + if input_key.upper() == key.upper(): + return key + return None + + +def get_base_search_context( + url: str = None, + facets: str = None, + variable: str = None, + variable_id: str = None, + institution_id: str = None, + project: str = None, + experiment_id: str = None, + source_id: str = None, + default_grid_label: str = None, + default_frequency: str = None, +) -> DatasetSearchContext: + conn = SearchConnection(url=url, distrib=False) + ctx = conn.new_context( + project=project, + variable=variable, + variable_id=variable_id, + institution_id=institution_id, + experiment_id=experiment_id, + source_id=source_id, + facets=facets, + ) + ctx = handle_base_search_constraints(ctx, default_frequency, default_grid_label) + return ctx - Args: - path_to_file: Path to Json file - Returns: - Dataframe - """ - if not path_to_file: - path_to_file = APP_ROOT / "download/constants/selected_scenariosMIPs.json" - if isinstance(path_to_file, str): - path_to_file = pathlib.Path(path_to_file) - selected_scenarios = pd.read_json(path_to_file, orient="records") - return selected_scenarios +def search_and_download_esgf_raw_single_var( + variable: str, + institution_id: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + data_dir, + logger=LOGGER, +): + facets = "project,frequency,variable,nominal_resolution,version,target_mip,grid_label" + for url in NODE_LINK_URLS: + results_list = [] + try: + + ctx = get_base_search_context( + url=url, + project=project, + institution_id=institution_id, + variable=variable, + facets=facets, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + ) + + mips_targets = list(ctx.facet_counts["target_mip"]) + logger.info(f"Available target mips: {mips_targets}") + + for target in mips_targets: + ctx_target = ctx.constrain(target_mip=target) + version = get_upload_version(context=ctx_target, preferred_version=preferred_version) + if version: + ctx_target = ctx_target.constrain(version=version) + + results = ctx_target.search() + logger.info(f"Result len {len(results)}") + if results: + results_list.append(results) + if results_list: + for r in results_list: + download_raw_input_variable( + project=project, + institution_id=institution_id, + search_results=r, + variable=variable, + base_path=data_dir, + ) + return results_list + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") + + +def search_and_download_esgf_biomass_single_var( + variable: str, + variable_id: str, + institution_id: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + base_path: Path, + logger=LOGGER, +): + facets = "nominal_resolution,version" + for url in NODE_LINK_URLS: + try: + ctx = get_base_search_context( + url=url, + facets=facets, + variable=variable, + variable_id=variable_id, + institution_id=institution_id, + project=project, + default_grid_label=default_grid_label, + default_frequency=default_frequency, + ) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx = ctx.constrain(version=version) + + results = ctx.search() + logger.info(f"Result len {len(results)}") + + result_list = [r.file_context().search() for r in results] + logger.info(f"List of results :\n{result_list}") + if results: + logger.info(results[0].file_context()) + download_metadata_variable( + project=project, + institution_id=institution_id, + search_results=results, + variable=variable, + base_path=base_path, + ) + return results + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") + + +def search_and_download_esgf_model_single_var( + model: str, + variable: str, + experiment: str, + project: str, + default_grid_label: str, + default_frequency: str, + preferred_version: str, + max_ensemble_members: int, + ensemble_members: list[str], + base_path: Path, + logger=LOGGER, +): + facets = ( + "project,experiment_id,source_id,variable,frequency,variant_label,variable, nominal_resolution, " + "version, grid_label, experiment_id" + ) + + for url in NODE_LINK_URLS: + results_list = [] + try: + logger.info("Using download_from_model_single_var() function") + + ctx = get_base_search_context( + url=url, + facets=facets, + variable=variable, + experiment_id=experiment, + source_id=model, + default_frequency=default_frequency, + default_grid_label=default_grid_label, + ) + + logger.info(ctx) + + variants = list(ctx.facet_counts["variant_label"]) + + if len(variants) < 1: + logger.info( + "No items were found for this request. Please check on the esgf server if the combination of your " + "model/scenarios/variables exists." + ) + raise ValueError( + f"Downloader did not find any items on esgf for your request with: Project {project}, " + f"Experiment {experiment}, Model {model}, Variable {variable}." + ) + + logger.info(f"Available variants : {variants}\n") + logger.info(f"Length : {len(variants)}") + + # TODO refactor logic of if/else + if not ensemble_members: + if max_ensemble_members > len(variants): + logger.info("Less ensemble members available than maximum number desired. Including all variants.") + ensemble_member_final_list = variants + else: + logger.info( + f"{len(variants)} ensemble members available than desired (max {max_ensemble_members}. " + f"Choosing only the first {max_ensemble_members}.)." + ) + ensemble_member_final_list = variants[:max_ensemble_members] + else: + logger.info(f"Desired list of ensemble members given: {ensemble_members}") + ensemble_member_final_list = list(set(variants) & set(ensemble_members)) + if len(ensemble_member_final_list) == 0: + logger.info("WARNING: no overlap between available and desired ensemble members!") + logger.info("Skipping.") + return None + + for ensemble_member in ensemble_member_final_list: + logger.info(f"Ensembles member: {ensemble_member}") + ctx_ensemble = ctx.constrain(variant_label=ensemble_member) + logger.info(ctx_ensemble) + + version = get_upload_version(context=ctx, preferred_version=preferred_version) + if version: + ctx_ensemble = ctx_ensemble.constrain(version=version) + + results = ctx_ensemble.search() + if results: + results_list.append(results) + + logger.info(f"Result len {len(results)}") + logger.info(results_list) + if results_list: + for results in results_list: + download_model_variable( + project=project, + model_id=model, + search_results=results, + variable=variable, + base_path=base_path, + ) + return results_list + logger.error(f"Could not find anything for {url}") + except Exception as e: + logger.error(f"Error: {e}") + + raise Exception(f"Could not find anything for all urls: {NODE_LINK_URLS}") diff --git a/climateset/utils.py b/climateset/utils.py index d93464b..720a8b1 100644 --- a/climateset/utils.py +++ b/climateset/utils.py @@ -1,7 +1,6 @@ import logging import pathlib import sys -from typing import Union import yaml @@ -56,7 +55,7 @@ def get_mip(experiment: str): return "CMIP" -def get_yaml_config(yaml_config_file: Union[str, pathlib.Path], logger: logging.Logger = LOGGER) -> dict: +def get_yaml_config(yaml_config_file: str | pathlib.Path, logger: logging.Logger = LOGGER) -> dict: """ Reads a YAML configuration file and returns its contents as a dictionary. @@ -95,7 +94,7 @@ def get_yaml_config(yaml_config_file: Union[str, pathlib.Path], logger: logging. for path in potential_paths: if path.exists(): config_filepath = path - logger.info(f"Yaml config file [{str(path)}] found.") + logger.info(f"Yaml config file [{path!s}] found.") break params = {} diff --git a/configs/core_dataset.yaml b/configs/core_dataset.yaml index e6981f5..21ad438 100644 --- a/configs/core_dataset.yaml +++ b/configs/core_dataset.yaml @@ -1,4 +1,7 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CO2", "BC", "SO2", "CH4", "tas", "pr"] - experiments: ["historical","ssp126", "ssp245", "ssp370", "ssp585"] \ No newline at end of file +CMIP6: + model: "NorESM2-LM" + variables: [ "tas", "pr" ] + experiments: [ "historical","ssp126", "ssp245", "ssp370", "ssp585" ] +input4MIPs: + variables: [ "CO2", "BC", "SO2", "CH4" ] + experiments: [ "historical","ssp126", "ssp245", "ssp370", "ssp585" ] diff --git a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml index db0c390..7a323b7 100644 --- a/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml +++ b/configs/downloader/cmip6/awi_sea-ice-thickness_control-1950.yaml @@ -1,4 +1,4 @@ -models: ["AWI-CM-1-1-HR"] -downloader_kwargs: - variables: ["sithick"] - experiments: ["control-1950"] \ No newline at end of file +CMIP6: + models: [ "AWI-CM-1-1-HR" ] + variables: [ "sithick" ] + experiments: [ "control-1950" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/canesm_co2_ssp.yaml b/configs/downloader/cmip6/canesm_co2_ssp.yaml index fc373d0..ebe7823 100644 --- a/configs/downloader/cmip6/canesm_co2_ssp.yaml +++ b/configs/downloader/cmip6/canesm_co2_ssp.yaml @@ -1,4 +1,4 @@ -models: ["CanESM5"] -downloader_kwargs: - variables: ["co2"] - experiments: ["ssp245"] \ No newline at end of file +CMIP6: + models: ["CanESM5"] + variables: [ "co2" ] + experiments: [ "abrupt-2xCO2" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/fgoals_tas_ssp.yaml b/configs/downloader/cmip6/fgoals_tas_ssp.yaml index cfd3eb1..dfec4c3 100644 --- a/configs/downloader/cmip6/fgoals_tas_ssp.yaml +++ b/configs/downloader/cmip6/fgoals_tas_ssp.yaml @@ -1,4 +1,4 @@ -models: ["FGOALS-g3"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] \ No newline at end of file +CMIP6: + models: ["FGOALS-g3"] + variables: [ "tas" ] + experiments: [ "ssp245" ] diff --git a/configs/downloader/cmip6/noresm_pr_historical.yaml b/configs/downloader/cmip6/noresm_pr_historical.yaml index 0498820..968a9e1 100644 --- a/configs/downloader/cmip6/noresm_pr_historical.yaml +++ b/configs/downloader/cmip6/noresm_pr_historical.yaml @@ -1,4 +1,5 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["pr"] - experiments: ["historical"] \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + variables: [ "pr" ] + experiments: [ "historical" ] + max_ensemble_members: 1 \ No newline at end of file diff --git a/configs/downloader/cmip6/noresm_tas_ssp.yaml b/configs/downloader/cmip6/noresm_tas_ssp.yaml index e1e7238..c8f605c 100644 --- a/configs/downloader/cmip6/noresm_tas_ssp.yaml +++ b/configs/downloader/cmip6/noresm_tas_ssp.yaml @@ -1,4 +1,5 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp245"] \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + variables: [ "tas" ] + experiments: [ "ssp245" ] + ensemble_members: [ "r9i1p1f2", "r8i1p1f2" ] \ No newline at end of file diff --git a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml index 2b2a25d..60339d8 100644 --- a/configs/downloader/cmip6/ukesm_tas_picontrol.yaml +++ b/configs/downloader/cmip6/ukesm_tas_picontrol.yaml @@ -1,4 +1,4 @@ -models: ["UKESM1-0-LL"] -downloader_kwargs: - variables: ["tas"] - experiments: ["piControl"] +CMIP6: + models: ["UKESM1-0-LL"] + variables: [ "tas" ] + experiments: [ "piControl" ] diff --git a/configs/downloader/constants/cmip6.yaml b/configs/downloader/constants/cmip6.yaml new file mode 100644 index 0000000..374756c --- /dev/null +++ b/configs/downloader/constants/cmip6.yaml @@ -0,0 +1,916 @@ +node_link: "https://esgf-node.llnl.gov/esg-search/" + +model_sources: + - "4AOP-v1-5" + - "ACCESS-CM2" + - "ACCESS-ESM1-5" + - "ACCESS-OM2" + - "ACCESS-OM2-025" + - "ARTS-2-3" + - "AWI-CM-1-1-HR" + - "AWI-CM-1-1-LR" + - "AWI-CM-1-1-MR" + - "AWI-ESM-1-1-LR" + - "AWI-ESM-2-1-LR" + - "BCC-CSM2-HR" + - "BCC-CSM2-MR" + - "BCC-ESM1" + - "CAM-MPAS-HR" + - "CAM-MPAS-LR" + - "CAMS-CSM1-0" + - "CanESM5" + - "CanESM5-1" + - "CanESM5-CanOE" + - "CAS-ESM2-0" + - "CESM1-1-CAM5-CMIP5" + - "CESM1-CAM5-SE-HR" + - "CESM1-CAM5-SE-LR" + - "CESM1-WACCM-SC" + - "CESM2" + - "CESM2-FV2" + - "CESM2-WACCM" + - "CESM2-WACCM-FV2" + - "CIESM" + - "CMCC-CM2-HR4" + - "CMCC-CM2-SR5" + - "CMCC-CM2-VHR4" + - "CMCC-ESM2" + - "CNRM-CM6-1" + - "CNRM-CM6-1-HR" + - "CNRM-ESM2-1" + - "E3SM-1-0" + - "E3SM-1-1" + - "E3SM-1-1-ECA" + - "E3SM-2-0" + - "EC-Earth3" + - "EC-Earth3-AerChem" + - "EC-Earth3-CC" + - "EC-Earth3-GrIS" + - "EC-Earth3-HR" + - "EC-Earth3-LR" + - "EC-Earth3-Veg" + - "EC-Earth3-Veg-LR" + - "EC-Earth3P" + - "EC-Earth3P-HR" + - "EC-Earth3P-VHR" + - "ECMWF-IFS-HR" + - "ECMWF-IFS-LR" + - "ECMWF-IFS-MR" + - "FGOALS-f3-H" + - "FGOALS-f3-L" + - "FGOALS-g3" + - "FIO-ESM-2-0" + - "GFDL-AM4" + - "GFDL-CM4" + - "GFDL-CM4C192" + - "GFDL-ESM2M" + - "GFDL-ESM4" + - "GFDL-GRTCODE" + - "GFDL-OM4p5B" + - "GFDL-RFM-DISORT" + - "GISS-E2-1-G" + - "GISS-E2-1-G-CC" + - "GISS-E2-1-H" + - "GISS-E2-2-G" + - "GISS-E2-2-H" + - "GISS-E3-G" + - "HadGEM3-GC31-HH" + - "HadGEM3-GC31-HM" + - "HadGEM3-GC31-LL" + - "HadGEM3-GC31-LM" + - "HadGEM3-GC31-MH" + - "HadGEM3-GC31-MM" + - "HiRAM-SIT-HR" + - "HiRAM-SIT-LR" + - "ICON-ESM-LR" + - "IITM-ESM" + - "INM-CM4-8" + - "INM-CM5-0" + - "INM-CM5-H" + - "IPSL-CM5A2-INCA" + - "IPSL-CM6A-ATM-HR" + - "IPSL-CM6A-ATM-ICO-HR" + - "IPSL-CM6A-ATM-ICO-LR" + - "IPSL-CM6A-ATM-ICO-MR" + - "IPSL-CM6A-ATM-ICO-VHR" + - "IPSL-CM6A-ATM-LR-REPROBUS" + - "IPSL-CM6A-LR" + - "IPSL-CM6A-LR-INCA" + - "IPSL-CM6A-MR1" + - "KACE-1-0-G" + - "KIOST-ESM" + - "LBLRTM-12-8" + - "MCM-UA-1-0" + - "MIROC-ES2H" + - "MIROC-ES2H-NB" + - "MIROC-ES2L" + - "MIROC6" + - "MPI-ESM-1-2-HAM" + - "MPI-ESM1-2-HR" + - "MPI-ESM1-2-LR" + - "MPI-ESM1-2-XR" + - "MRI-AGCM3-2-H" + - "MRI-AGCM3-2-S" + - "MRI-ESM2-0" + - "NESM3" + - "NICAM16-7S" + - "NICAM16-8S" + - "NICAM16-9S" + - "NorCPM1" + - "NorESM1-F" + - "NorESM2-LM" + - "NorESM2-MH" + - "RRTMG-LW-4-91" + - "RRTMG-SW-4-02" + - "RTE-RRTMGP-181204" + - "SAM0-UNICON" + - "TaiESM1" + - "TaiESM1-TIMCOM" + - "TaiESM1-TIMCOM2" + - "UKESM1-0-LL" + - "UKESM1-1-LL" + - "UKESM1-ice-LL" + - "E3SM-2-0-NARRM" + - "E3SM-2-1" + - "EC-Earth3-ESM-1" + - "PCMDI-test-1-0" + +var_source_lookup: + - "ztp" + - "zsatcalc" + - "zsatarag" + - "zostoga" + - "zossq" + - "zos" + - "zoocos" + - "zooc" + - "zo2min" + - "zhalfo" + - "zg500" + - "zg1000" + - "zg100" + - "zg10" + - "zg" + - "zfullo" + - "wtd" + - "wo" + - "wmo" + - "wfonocorr" + - "wfo" + - "wetss" + - "wetso4" + - "wetso2" + - "wetlandFrac" + - "wetlandCH4" + - "wetbc" + - "wap500" + - "wap" + - "vsf" + - "volo" + - "volcello" + - "vo" + - "vmo" + - "vegHeight" + - "va" + - "uo" + - "umo" + - "ua" + - "tslsi" + - "tsl" + - "ts" + - "tran" + - "tossq" + - "tosga" + - "tos" + - "tob" + - "thkcello" + - "thetaot700" + - "thetaot300" + - "thetaot2000" + - "thetaot" + - "thetaoga" + - "thetao" + - "tgs" + - "tcs" + - "tauvo" + - "tauv" + - "tauuo" + - "tauu" + - "tasmin" + - "tasmax" + - "tas" + - "talkos" + - "talknat" + - "talk" + - "ta850" + - "ta700" + - "ta500" + - "ta" + - "t20d" + - "spco2" + - "sossq" + - "sosga" + - "sos" + - "sootsn" + - "somint" + - "soga" + - "sob" + - "so2" + - "so" + - "snw" + - "sndmasswindrif" + - "sndmasssnf" + - "sndmasssi" + - "sndmassmelt" + - "snd" + - "snc" + - "sivols" + - "sivoln" + - "sivol" + - "siv" + - "siu" + - "sitimefrac" + - "sithick" + - "sitemptop" + - "sitempsnic" + - "sitempbot" + - "sistryubot" + - "sistrydtop" + - "sistrxubot" + - "sistrxdtop" + - "sispeed" + - "sisnthick" + - "sisnmass" + - "sisnhc" + - "sisnconc" + - "sirdgthick" + - "sirdgconc" + - "sipr" + - "sios" + - "simpconc" + - "simass" + - "siitdthick" + - "siitdsnthick" + - "siitdsnconc" + - "siitdconc" + - "sihc" + - "siforcetilty" + - "siforcetiltx" + - "siforceintstry" + - "siforceintstrx" + - "siforcecorioly" + - "siforcecoriolx" + - "siflswutop" + - "siflswdtop" + - "siflswdbot" + - "siflsensupbot" + - "siflsenstop" + - "sifllwutop" + - "sifllwdtop" + - "sifllatstop" + - "siflfwdrain" + - "siflfwbot" + - "siflcondtop" + - "siflcondbot" + - "sifb" + - "siextents" + - "siextentn" + - "sidmasstrany" + - "sidmasstranx" + - "sidmassth" + - "sidmasssi" + - "sidmassmelttop" + - "sidmassmeltbot" + - "sidmasslat" + - "sidmassgrowthwat" + - "sidmassgrowthbot" + - "sidmassevapsubl" + - "sidmassdyn" + - "sidivvel" + - "sidconcth" + - "sidconcdyn" + - "siconc" + - "sicompstren" + - "siarean" + - "siage" + - "si" + - "sftof" + - "sftlf" + - "sftgif" + - "sfdsi" + - "sfcWind" + - "sf6" + - "rtmt" + - "rsutcsaf" + - "rsutcs" + - "rsutaf" + - "rsut" + - "rsuscs" + - "rsus" + - "rsntds" + - "rsdt" + - "rsdsdiff" + - "rsdscs" + - "rsds" + - "rlutcsaf" + - "rlutcs" + - "rlutaf" + - "rlut" + - "rlus" + - "rldscs" + - "rlds" + - "rh" + - "reffclwtop" + - "ra" + - "rMaint" + - "rGrowth" + - "qgwr" + - "pso" + - "psl" + - "ps" + - "prw" + - "prveg" + - "prsn" + - "prra" + - "prc" + - "pr" + - "ppos" + - "pp" + - "popos" + - "pop" + - "ponos" + - "pon" + - "po4os" + - "po4" + - "phynos" + - "phyn" + - "phyfeos" + - "phyfe" + - "phyc" + - "phos" + - "phnat" + - "phalf" + - "ph" + - "pfull" + - "pctisccp" + - "pbo" + - "orog" + - "opottempmint" + - "oh" + - "od870aer" + - "od550ss" + - "od550so4" + - "od550oa" + - "od550lt1aer" + - "od550dust" + - "od550csaer" + - "od550bc" + - "od550aerh2o" + - "od550aer" + - "od440aer" + - "obvfsq" + - "o3" + - "o2satos" + - "o2sat" + - "o2os" + - "o2min" + - "o2" + - "nppWood" + - "nppRoot" + - "nppLeaf" + - "npp" + - "no3os" + - "no3" + - "nep" + - "nbp" + - "nVeg" + - "nStem" + - "nSoil" + - "nRoot" + - "nMineralNO3" + - "nMineralNH4" + - "nMineral" + - "nLitter" + - "nLeaf" + - "nLand" + - "n2oglobal" + - "msftmzmpa" + - "msftmz" + - "msftmrhompa" + - "msftmrho" + - "msftbarot" + - "mrtws" + - "mrsos" + - "mrsol" + - "mrso" + - "mrsll" + - "mrsfl" + - "mrros" + - "mrrob" + - "mrro" + - "mrlso" + - "mrfso" + - "mmrss" + - "mmrsoa" + - "mmrso4" + - "mmrpm2p5" + - "mmrpm1" + - "mmroa" + - "mmrdust" + - "mmrbc" + - "mmraerh2o" + - "mlotstsq" + - "mlotstmin" + - "mlotstmax" + - "mlotst" + - "mfo" + - "masso" + - "masscello" + - "lwsnl" + - "lwp" + - "loadss" + - "loaddust" + - "lai" + - "isop" + - "intpp" + - "intpoc" + - "intpn2" + - "intdoc" + - "intdic" + - "huss" + - "hus" + - "hurs" + - "hur" + - "hfy" + - "hfx" + - "hfss" + - "hfls" + - "hfds" + - "hfbasinpmdiff" + - "hfbasinpmadv" + - "hfbasinpadv" + - "hfbasin" + - "gpp" + - "fsitherm" + - "froc" + - "frn" + - "friver" + - "fric" + - "frfe" + - "ficeberg" + - "fgo2" + - "fgdms" + - "fgco2nat" + - "fgco2" + - "fVegLitterSenescence" + - "fVegLitterMortality" + - "fVegLitter" + - "fNup" + - "fNnetmin" + - "fNloss" + - "fNleach" + - "fNgasNonFire" + - "fNgasFire" + - "fNgas" + - "fNfert" + - "fNdep" + - "fNProduct" + - "fNOx" + - "fN2O" + - "fLuc" + - "fLitterFire" + - "fHarvestToProduct" + - "fHarvest" + - "fFireNat" + - "fFire" + - "fDeforestToProduct" + - "fBNF" + - "evspsblveg" + - "evspsblsoi" + - "evspsbl" + - "evs" + - "esn" + - "es" + - "epsi100" + - "epp100" + - "epn100" + - "epfe100" + - "epcalc100" + - "epc100" + - "emivoc" + - "emiss" + - "emiso4" + - "emiso2" + - "emioa" + - "emiisop" + - "emidust" + - "emidms" + - "emibvoc" + - "emibc" + - "ec" + - "dryso4" + - "dryso2" + - "drybc" + - "dpco2" + - "dmsos" + - "dms" + - "dmlt" + - "dissocos" + - "dissoc" + - "dissicos" + - "dissicnat" + - "dissic" + - "dfeos" + - "dfe" + - "detocos" + - "detoc" + - "deptho" + - "cod" + - "co3satcalcos" + - "co3satcalc" + - "co3sataragos" + - "co3satarag" + - "co3os" + - "co3nat" + - "co3" + - "co2mass" + - "co2" + - "clwvi" + - "clwmodis" + - "clw" + - "cltmodis" + - "cltisccp" + - "cltcalipso" + - "clt" + - "clmcalipso" + - "cllcalipso" + - "clivi" + - "climodis" + - "cli" + - "clhcalipso" + - "cl" + - "chlos" + - "chl" + - "chepsoa" + - "ch4global" + - "cfc12global" + - "cfc12" + - "cfc11global" + - "cfc11" + - "cdnc" + - "cct" + - "ccn" + - "ccb" + - "calcos" + - "calc" + - "cWood" + - "cVeg" + - "cStem" + - "cSoilSlow" + - "cSoilMedium" + - "cSoilFast" + - "cSoilAbove1m" + - "cSoil" + - "cRoot" + - "cMisc" + - "cLitter" + - "cLeaf" + - "cLand" + - "cCwd" + - "bsios" + - "bsi" + - "bldep" + - "bfeos" + - "bfe" + - "basin" + - "ares" + - "areacello" + - "areacella" + - "albisccp" + - "airmass" + - "agessc" + - "abs550aer" + +supported_experiments: + - "hist-1950HC" + - "lfmip-pdLC" + - "ssp126" + - "ssp126-ssp370Lu" + - "ssp245" + - "ssp370" + - "ssp370-lowNTCF" + - "ssp370-ssp126Lu" + - "ssp370SST" + - "ssp370SST-lowCH4" + - "ssp370SST-lowNTCF" + - "ssp370SST-ssp126Lu" + - "ssp585" + - "hist-resAMO" + - "hist-resIPO" + - "historical-ext" + - "lfmip-initLC" + - "lfmip-pdLC-cruNcep" + - "lfmip-pdLC-princeton" + - "lfmip-pdLC-wfdei" + - "lfmip-rmLC" + - "lfmip-rmLC-cruNcep" + - "lfmip-rmLC-princeton" + - "lfmip-rmLC-wfdei" + - "pa-futAntSIC" + - "pa-futArcSIC" + - "pa-pdSIC" + - "pa-piAntSIC" + - "pa-piArcSIC" + - "ssp119" + - "ssp370pdSST" + - "ssp370SST-lowAer" + - "ssp370SST-lowBC" + - "ssp370SST-lowO3" + - "ssp434" + - "ssp460" + - "dcppC-atl-pacemaker" + - "dcppC-pac-pacemaker" + - "pa-futAntSIC-ext" + - "pa-futArcSIC-ext" + - "pa-pdSIC-ext" + - "ssp370-lowNTCFCH4" + - "ssp370SST-lowNTCFCH4" + - "volc-cluster-21C" + - "yr2010CO2" + - "dcppA-historical-niff" + - "1pctCO2" + - "1pctCO2-bgc" + - "abrupt-4xCO2" + - "dcppC-amv-neg" + - "dcppC-amv-pos" + - "dcppC-atl-control" + - "dcppC-ipv-neg" + - "dcppC-ipv-pos" + - "dcppC-pac-control" + - "deforest-globe" + - "faf-heat" + - "faf-heat-NA50pct" + - "faf-stress" + - "faf-water" + - "G1" + - "hist-aer" + - "hist-GHG" + - "hist-nat" + - "hist-noLu" + - "hist-piNTCF" + - "hist-spAer-all" + - "histSST" + - "histSST-noLu" + - "histSST-piCH4" + - "histSST-piNTCF" + - "piClim-4xCO2" + - "piClim-aer" + - "piClim-anthro" + - "piClim-CH4" + - "piClim-control" + - "piClim-ghg" + - "piClim-HC" + - "piClim-lu" + - "piClim-NTCF" + - "volc-long-eq" + - "volc-pinatubo-full" + - "volc-pinatubo-strat" + - "volc-pinatubo-surf" + - "1pctCO2-rad" + - "1pctCO2Ndep" + - "1pctCO2Ndep-bgc" + - "abrupt-0p5xCO2" + - "abrupt-2xCO2" + - "abrupt-solm4p" + - "abrupt-solp4p" + - "dcppC-amv-ExTrop-neg" + - "dcppC-amv-ExTrop-pos" + - "dcppC-amv-Trop-neg" + - "dcppC-amv-Trop-pos" + - "dcppC-ipv-NexTrop-neg" + - "dcppC-ipv-NexTrop-pos" + - "faf-all" + - "faf-antwater-stress" + - "faf-heat-NA0pct" + - "faf-passiveheat" + - "hist-bgc" + - "hist-piAer" + - "hist-spAer-aer" + - "hist-stratO3" + - "histSST-piAer" + - "histSST-piN2O" + - "histSST-piO3" + - "piClim-2xdust" + - "piClim-2xss" + - "piClim-BC" + - "piClim-histaer" + - "piClim-histall" + - "piClim-histghg" + - "piClim-histnat" + - "piClim-N2O" + - "piClim-O3" + - "piClim-spAer-aer" + - "piClim-spAer-anthro" + - "piClim-spAer-histaer" + - "piClim-spAer-histall" + - "piSST-4xCO2-solar" + - "volc-cluster-ctrl" + - "volc-long-hlN" + - "hist-all-aer2" + - "hist-all-nat2" + - "hist-CO2" + - "hist-sol" + - "hist-totalO3" + - "hist-volc" + - "piClim-2xDMS" + - "piClim-2xfire" + - "piClim-2xNOx" + - "piClim-2xVOC" + - "piClim-NH3" + - "piClim-NOx" + - "piClim-OC" + - "piClim-SO2" + - "piClim-VOC" + - "volc-long-hlS" + - "histSST-1950HC" + - "esm-ssp585" + - "esm-ssp585-ssp126Lu" + - "esm-hist-ext" + - "ssp534-over-bgc" + - "ssp585-bgc" + - "esm-1pct-brch-1000PgC" + - "esm-1pct-brch-750PgC" + - "esm-1pct-brch-2000PgC" + - "esm-hist" + - "esm-pi-cdr-pulse" + - "esm-pi-CO2pulse" + - "esm-1pctCO2" + - "esm-bell-750PgC" + - "esm-bell-1000PgC" + - "esm-bell-2000PgC" + - "esm-yr2010CO2-control" + - "1pctCO2-4xext" + - "1pctCO2-cdr" + - "esm-ssp534-over" + - "esm-ssp585-ocn-alk" + - "esm-ssp585ext" + - "esm-ssp585-ocn-alk-stop" + - "esm-ssp585-ssp126Lu-ext" + - "esm-yr2010CO2-cdr-pulse" + - "esm-yr2010CO2-CO2pulse" + - "esm-yr2010CO2-noemit" + - "amip" + - "amip-4xCO2" + - "amip-future4K" + - "amip-hist" + - "amip-p4K" + - "aqua-4xCO2" + - "aqua-control" + - "aqua-p4K" + - "highresSST-present" + - "ism-ctrl-std" + - "ism-pdControl-std" + - "ism-piControl-self" + - "land-hist" + - "land-hist-altStartYear" + - "land-noLu" + - "land-ssp126" + - "land-ssp585" + - "lgm" + - "lig127k" + - "midHolocene" + - "midPliocene-eoi400" + - "omip1" + - "past1000" + - "piControl-withism" + - "rad-irf" + - "a4SST" + - "a4SSTice" + - "a4SSTice-4xCO2" + - "amip-a4SST-4xCO2" + - "amip-lfmip-pdLC" + - "amip-lfmip-pObs" + - "amip-lfmip-rmLC" + - "amip-lwoff" + - "amip-m4K" + - "amip-p4K-lwoff" + - "amip-piForcing" + - "aqua-control-lwoff" + - "aqua-p4K-lwoff" + - "dcppA-assim" + - "esm-piControl-spinup" + - "land-cClim" + - "land-cCO2" + - "land-crop-grass" + - "land-crop-noFert" + - "land-crop-noIrrig" + - "land-crop-noIrrigFert" + - "land-hist-altLu1" + - "land-hist-altLu2" + - "land-hist-cruNcep" + - "land-hist-princeton" + - "land-hist-wfdei" + - "land-noFire" + - "land-noPasture" + - "land-noShiftCultivate" + - "land-noWoodHarv" + - "land-ssp434" + - "omip1-spunup" + - "past1000-solaronly" + - "past1000-volconly" + - "piControl-spinup" + - "piControl-spinup-cmip5" + - "piSST" + - "piSST-4xCO2" + - "piSST-4xCO2-rad" + - "piSST-pxK" + - "spinup-1950" + - "amip-hld" + - "amip-TIP" + - "amip-TIP-nosh" + - "control-slab" + - "dcppC-atl-spg" + - "esm-past1000" + - "ism-lig127k-std" + - "omip2" + - "omip2-spunup" + - "past2k" + - "esm-piControl" + - "historical" + - "historical-cmip5" + - "hist-aer-cmip5" + - "hist-GHG-cmip5" + - "hist-nat-cmip5" + - "piControl" + - "piControl-cmip5" + - "ssp245-aer" + - "ssp245-cov-strgreen" + - "ssp245-covid" + - "ssp245-cov-aer" + - "ssp245-cov-fossil" + - "ssp245-cov-GHG" + - "ssp245-cov-modgreen" + - "ssp245-GHG" + - "ssp245-nat" + - "ssp245-stratO3" + - "dcppA-hindcast" + - "dcppB-forecast" + - "dcppC-forecast-addPinatubo" + - "dcppC-hindcast-noPinatubo" + - "dcppC-hindcast-noAgung" + - "dcppC-hindcast-noElChichon" + - "dcppC-forecast-addAgung" + - "dcppC-forecast-addElChichon" + - "dcppA-hindcast-niff" + - "futureSST-4xCO2-solar" + - "G6solar" + - "G6sulfur" + - "G6SST1" + - "G7cirrus" + - "G7SST1-cirrus" + - "ssp534-over" + - "G6SST2-solar" + - "G6SST2-sulfur" + - "G7SST2-cirrus" + - "control-1950" + - "hist-1950" + - "highres-future" + - "highresSST-4xCO2" + - "highresSST-future" + - "highresSST-LAI" + - "highresSST-p4K" + - "highresSST-smoothed" + - "1pctCO2to4x-withism" + - "historical-withism" + - "ism-1pctCO2to4x-self" + - "ism-historical-self" + - "ism-1pctCO2to4x-std" + - "ism-historical-std" + - "ism-asmb-std" + - "ism-bsmb-std" + - "ism-amip-std" + - "ism-ssp585-self" + - "ism-ssp585-std" + - "ssp585-withism" + - "pdSST-futAntSIC" + - "pdSST-futArcSIC" + - "pdSST-pdSIC" + - "pdSST-piAntSIC" + - "pdSST-piArcSIC" + - "piSST-pdSIC" + - "futSST-pdSIC" + - "piSST-piSIC" + - "amip-climSIC" + - "amip-climSST" + - "modelSST-futArcSIC" + - "modelSST-pdSIC" + - "pdSST-futArcSICSIT" + - "pdSST-futBKSeasSIC" + - "pdSST-futOkhotskSIC" + - "pdSST-pdSICSIT" + - "rcp26-cmip5" + - "rcp45-cmip5" + - "rcp60-cmip5" + - "rcp85-cmip5" + - "volc-cluster-mill" + - "volc-pinatubo-slab" \ No newline at end of file diff --git a/configs/downloader/constants/cmip6plus.yaml b/configs/downloader/constants/cmip6plus.yaml new file mode 100644 index 0000000..e7e255d --- /dev/null +++ b/configs/downloader/constants/cmip6plus.yaml @@ -0,0 +1,13 @@ +node_link: "http://esgf-data2.llnl.gov" + +model_sources: + - "HasGEM3-GC31-LL" + +var_source_lookup: + - "areacella" + - "mrsofc" + +supported_experiments: + - "hist-lu" + - "hist-piAer" + - "hist-piVolc" \ No newline at end of file diff --git a/configs/downloader/constants/imput4MIPs.yaml b/configs/downloader/constants/imput4MIPs.yaml new file mode 100644 index 0000000..4915f04 --- /dev/null +++ b/configs/downloader/constants/imput4MIPs.yaml @@ -0,0 +1,732 @@ +node_link: "https://esgf-node.llnl.gov/esg-search/" + +emissions_endings: + - "_em_openburning" + - "_em_anthro" + - "_em_AIR_anthro" + +meta_endings_prc: + - "_percentage_AGRI" + - "_percentage_BORF" + - "_percentage_DEFO" + - "_percentage_PEAT" + - "_percentage_SAVA" + - "_percentage_TEMF" + +meta_endings_shar: + - "_openburning_share" + +mip_era: "CMIP6" + +target_mip: "ScenarioMIP" + +supported_experiments: + - "historical" + - "ssp119" + - "ssp126" + - "ssp245" + - "ssp370" + - "ssp434" + - "ssp460" + - "ssp534-over" + - "ssp585" + +var_source_lookup: + - "years" + - "year_weight" + - "year_fr" + - "wlenbinsize" + - "wlen_bnds" + - "wlen" + - "wfo" + - "wetnoy" + - "wetnhx" + - "water_vapor" + - "vos" + - "volume_density" + - "vo" + - "vmro3" + - "vas" + - "urban_to_secdn" + - "urban_to_secdf" + - "urban_to_range" + - "urban_to_pastr" + - "urban_to_c4per" + - "urban_to_c4ann" + - "urban_to_c3per" + - "urban_to_c3nfx" + - "urban_to_c3ann" + - "urban" + - "uos" + - "uo" + - "uas" + - "tsi" + - "ts" + - "total_solar_irradiance" + - "tosbcs" + - "tos" + - "thetao" + - "theta" + - "temp_level" + - "temp_layer" + - "tauv" + - "tauu" + - "tas" + - "surface_temperature" + - "surface_emissivity" + - "surface_albedo" + - "sst" + - "ssn" + - "ssi" + - "ssa550" + - "sos" + - "solar_zenith_angle" + - "so2f2_SH" + - "so2f2_NH" + - "so2f2_GM" + - "so" + - "sithick" + - "sig_lon_W" + - "sig_lon_E" + - "sig_lat_W" + - "sig_lat_E" + - "siconcbcs" + - "siconca" + - "siconc" + - "sftof" + - "sftflf" + - "sf6_SH" + - "sf6_NH" + - "sf6_GM" + - "secyf_harv" + - "secyf_bioh" + - "secnf_harv" + - "secnf_bioh" + - "secmf_harv" + - "secmf_bioh" + - "secmb" + - "secma" + - "secdn_to_urban" + - "secdn_to_secdf" + - "secdn_to_range" + - "secdn_to_pastr" + - "secdn_to_c4per" + - "secdn_to_c4ann" + - "secdn_to_c3per" + - "secdn_to_c3nfx" + - "secdn_to_c3ann" + - "secdn" + - "secdf_to_urban" + - "secdf_to_secdn" + - "secdf_to_range" + - "secdf_to_pastr" + - "secdf_to_c4per" + - "secdf_to_c4ann" + - "secdf_to_c3per" + - "secdf_to_c3nfx" + - "secdf_to_c3ann" + - "secdf" + - "scph" + - "scnum" + - "sad_of_big_particles" + - "sad" + - "rsds" + - "rndwd" + - "rmean" + - "rlds" + - "range_to_urban" + - "range_to_secdn" + - "range_to_secdf" + - "range_to_pastr" + - "range_to_c4per" + - "range_to_c4ann" + - "range_to_c3per" + - "range_to_c3nfx" + - "range_to_c3ann" + - "range" + - "ptbio" + - "psl" + - "prsn" + - "prra" + - "profile_weight" + - "primn_to_urban" + - "primn_to_secdf" + - "primn_to_range" + - "primn_to_pastr" + - "primn_to_c4per" + - "primn_to_c4ann" + - "primn_to_c3per" + - "primn_to_c3nfx" + - "primn_to_c3ann" + - "primn_harv" + - "primn_bioh" + - "primn" + - "primf_to_urban" + - "primf_to_secdn" + - "primf_to_range" + - "primf_to_pastr" + - "primf_to_c4per" + - "primf_to_c4ann" + - "primf_to_c3per" + - "primf_to_c3nfx" + - "primf_to_c3ann" + - "primf_harv" + - "primf_bioh" + - "primf" + - "pressure" + - "pres_level" + - "pres_layer" + - "pr" + - "plume_number" + - "plume_lon" + - "plume_lat" + - "plume_feature" + - "percentage_TEMF" + - "percentage_SAVA" + - "percentage_PEAT" + - "percentage_DEFO" + - "percentage_BORF" + - "percentage_AGRI" + - "pastr_to_urban" + - "pastr_to_secdn" + - "pastr_to_secdf" + - "pastr_to_range" + - "pastr_to_c4per" + - "pastr_to_c4ann" + - "pastr_to_c3per" + - "pastr_to_c3nfx" + - "pastr_to_c3ann" + - "pastr" + - "ozone" + - "oxygen_GM" + - "nitrous_oxide_SH" + - "nitrous_oxide_NH" + - "nitrous_oxide_GM" + - "nitrogen_GM" + - "nf3_SH" + - "nf3_NH" + - "nf3_GM" + - "mrro" + - "month" + - "mole_fraction_of_so2f2_in_air" + - "mole_fraction_of_sf6_in_air" + - "mole_fraction_of_nitrous_oxide_in_air" + - "mole_fraction_of_nf3_in_air" + - "mole_fraction_of_methyl_chloride_in_air" + - "mole_fraction_of_methyl_bromide_in_air" + - "mole_fraction_of_methane_in_air" + - "mole_fraction_of_hfc4310mee_in_air" + - "mole_fraction_of_hfc365mfc_in_air" + - "mole_fraction_of_hfc32_in_air" + - "mole_fraction_of_hfc245fa_in_air" + - "mole_fraction_of_hfc23_in_air" + - "mole_fraction_of_hfc236fa_in_air" + - "mole_fraction_of_hfc227ea_in_air" + - "mole_fraction_of_hfc152a_in_air" + - "mole_fraction_of_hfc143a_in_air" + - "mole_fraction_of_hfc134aeq_in_air" + - "mole_fraction_of_hfc134a_in_air" + - "mole_fraction_of_hfc125_in_air" + - "mole_fraction_of_hcfc22_in_air" + - "mole_fraction_of_hcfc142b_in_air" + - "mole_fraction_of_hcfc141b_in_air" + - "mole_fraction_of_halon2402_in_air" + - "mole_fraction_of_halon1301_in_air" + - "mole_fraction_of_halon1211_in_air" + - "mole_fraction_of_co2eq_in_air" + - "mole_fraction_of_chcl3_in_air" + - "mole_fraction_of_ch3ccl3_in_air" + - "mole_fraction_of_ch2cl2_in_air" + - "mole_fraction_of_cfc12eq_in_air" + - "mole_fraction_of_cfc12_in_air" + - "mole_fraction_of_cfc11eq_in_air" + - "mole_fraction_of_cfc11_in_air" + - "mole_fraction_of_cfc115_in_air" + - "mole_fraction_of_cfc114_in_air" + - "mole_fraction_of_cfc113_in_air" + - "mole_fraction_of_cf4_in_air" + - "mole_fraction_of_carbon_tetrachloride_in_air" + - "mole_fraction_of_carbon_dioxide_in_air" + - "mole_fraction_of_c_c4f8_in_air" + - "mole_fraction_of_c8f18_in_air" + - "mole_fraction_of_c7f16_in_air" + - "mole_fraction_of_c6f14_in_air" + - "mole_fraction_of_c5f12_in_air" + - "mole_fraction_of_c4f10_in_air" + - "mole_fraction_of_c3f8_in_air" + - "mole_fraction_of_c2f6_in_air" + - "methyl_chloride_SH" + - "methyl_chloride_NH" + - "methyl_chloride_GM" + - "methyl_bromide_SH" + - "methyl_bromide_NH" + - "methyl_bromide_GM" + - "methane_SH" + - "methane_NH" + - "methane_GM" + - "mask4resto_ipv_Nextrop" + - "mask4resto_ipv" + - "mask4resto_amv_trop" + - "mask4resto_amv_extrop" + - "mask4resto_amv" + - "lon_bounds" + - "licalvf" + - "lat_bounds" + - "kp" + - "is_biomass" + - "irrig_c4per" + - "irrig_c4ann" + - "irrig_c3per" + - "irrig_c3nfx" + - "irrig_c3ann" + - "ipv_index" + - "iprp" + - "iprm" + - "iprg" + - "icwtr" + - "huss" + - "hfds" + - "hfc4310mee_SH" + - "hfc4310mee_NH" + - "hfc4310mee_GM" + - "hfc365mfc_SH" + - "hfc365mfc_NH" + - "hfc365mfc_GM" + - "hfc32_SH" + - "hfc32_NH" + - "hfc32_GM" + - "hfc245fa_SH" + - "hfc245fa_NH" + - "hfc245fa_GM" + - "hfc23_SH" + - "hfc23_NH" + - "hfc23_GM" + - "hfc236fa_SH" + - "hfc236fa_NH" + - "hfc236fa_GM" + - "hfc227ea_SH" + - "hfc227ea_NH" + - "hfc227ea_GM" + - "hfc152a_SH" + - "hfc152a_NH" + - "hfc152a_GM" + - "hfc143a_SH" + - "hfc143a_NH" + - "hfc143a_GM" + - "hfc134aeq_SH" + - "hfc134aeq_NH" + - "hfc134aeq_GM" + - "hfc134a_SH" + - "hfc134a_NH" + - "hfc134a_GM" + - "hfc125_SH" + - "hfc125_NH" + - "hfc125_GM" + - "hcfc22_SH" + - "hcfc22_NH" + - "hcfc22_GM" + - "hcfc142b_SH" + - "hcfc142b_NH" + - "hcfc142b_GM" + - "hcfc141b_SH" + - "hcfc141b_NH" + - "hcfc141b_GM" + - "halon2402_SH" + - "halon2402_NH" + - "halon2402_GM" + - "halon1301_SH" + - "halon1301_NH" + - "halon1301_GM" + - "halon1211_SH" + - "halon1211_NH" + - "halon1211_GM" + - "gzdis" + - "gridcellarea" + - "gpbio" + - "gldis" + - "glat_bnds" + - "glat" + - "fulwd" + - "ftr_weight" + - "fstnf" + - "friver" + - "flood" + - "fill_flag" + - "fharv_c4per" + - "fharv_c3per" + - "fertl_c4per" + - "fertl_c4ann" + - "fertl_c3per" + - "fertl_c3nfx" + - "fertl_c3ann" + - "f107" + - "expt_label" + - "evspsbl" + - "drynoy" + - "drynhx" + - "delta13co2_in_air" + - "datasource" + - "crpbf_total" + - "crpbf_c4per" + - "crpbf_c4ann" + - "crpbf_c3per" + - "crpbf_c3nfx" + - "crpbf_c3ann" + - "combf" + - "co2eq_SH" + - "co2eq_NH" + - "co2eq_GM" + - "chcl3_SH" + - "chcl3_NH" + - "chcl3_GM" + - "ch3ccl3_SH" + - "ch3ccl3_NH" + - "ch3ccl3_GM" + - "ch2cl2_SH" + - "ch2cl2_NH" + - "ch2cl2_GM" + - "cfc12eq_SH" + - "cfc12eq_NH" + - "cfc12eq_GM" + - "cfc12_SH" + - "cfc12_NH" + - "cfc12_GM" + - "cfc11eq_SH" + - "cfc11eq_NH" + - "cfc11eq_GM" + - "cfc11_SH" + - "cfc11_NH" + - "cfc11_GM" + - "cfc115_SH" + - "cfc115_NH" + - "cfc115_GM" + - "cfc114_SH" + - "cfc114_NH" + - "cfc114_GM" + - "cfc113_SH" + - "cfc113_NH" + - "cfc113_GM" + - "cf4_SH" + - "cf4_NH" + - "cf4_GM" + - "ccode" + - "carea" + - "carbon_tetrachloride_SH" + - "carbon_tetrachloride_NH" + - "carbon_tetrachloride_GM" + - "carbon_monoxide_GM" + - "carbon_dioxide_SH" + - "carbon_dioxide_NH" + - "carbon_dioxide_GM" + - "calyear" + - "calmonth" + - "calday" + - "c_c4f8_SH" + - "c_c4f8_NH" + - "c_c4f8_GM" + - "c8f18_SH" + - "c8f18_NH" + - "c8f18_GM" + - "c7f16_SH" + - "c7f16_NH" + - "c7f16_GM" + - "c6f14_SH" + - "c6f14_NH" + - "c6f14_GM" + - "c5f12_SH" + - "c5f12_NH" + - "c5f12_GM" + - "c4per_to_urban" + - "c4per_to_secdn" + - "c4per_to_secdf" + - "c4per_to_range" + - "c4per_to_pastr" + - "c4per_to_c4ann" + - "c4per_to_c3per" + - "c4per_to_c3nfx" + - "c4per_to_c3ann" + - "c4per" + - "c4f10_SH" + - "c4f10_NH" + - "c4f10_GM" + - "c4ann_to_urban" + - "c4ann_to_secdn" + - "c4ann_to_secdf" + - "c4ann_to_range" + - "c4ann_to_pastr" + - "c4ann_to_c4per" + - "c4ann_to_c3per" + - "c4ann_to_c3nfx" + - "c4ann_to_c3ann" + - "c4ann" + - "c3per_to_urban" + - "c3per_to_secdn" + - "c3per_to_secdf" + - "c3per_to_range" + - "c3per_to_pastr" + - "c3per_to_c4per" + - "c3per_to_c4ann" + - "c3per_to_c3nfx" + - "c3per_to_c3ann" + - "c3per" + - "c3nfx_to_urban" + - "c3nfx_to_secdn" + - "c3nfx_to_secdf" + - "c3nfx_to_range" + - "c3nfx_to_pastr" + - "c3nfx_to_c4per" + - "c3nfx_to_c4ann" + - "c3nfx_to_c3per" + - "c3nfx_to_c3ann" + - "c3nfx" + - "c3f8_SH" + - "c3f8_NH" + - "c3f8_GM" + - "c3ann_to_urban" + - "c3ann_to_secdn" + - "c3ann_to_secdf" + - "c3ann_to_range" + - "c3ann_to_pastr" + - "c3ann_to_c4per" + - "c3ann_to_c4ann" + - "c3ann_to_c3per" + - "c3ann_to_c3nfx" + - "c3ann" + - "c2f6_SH" + - "c2f6_NH" + - "c2f6_GM" + - "bounds_time" + - "bounds_sector" + - "bounds_latitude" + - "bounds_altitude" + - "beta_b" + - "beta_a" + - "asy550" + - "asl" + - "areacello" + - "areacellg" + - "areacella" + - "ap" + - "aod_spmx" + - "aod_fmbg" + - "ann_cycle" + - "angstrom" + - "amv_index" + - "altitude" + - "added_tree_cover" + - "acabf" + - "WST" + - "VOC_openburning_share" + - "VOC_em_openburning" + - "VOC_em_anthro" + - "VOC_em_AIR_anthro" + - "VOC25_other_voc_em_speciated_VOC_anthro" + - "VOC25_other_voc_em_speciated_VOC" + - "VOC25-other_voc_em_speciated_VOC" + - "VOC24_acids_em_speciated_VOC_anthro" + - "VOC24_acids_em_speciated_VOC" + - "VOC24-acids_em_speciated_VOC" + - "VOC23_ketones_em_speciated_VOC_anthro" + - "VOC23_ketones_em_speciated_VOC" + - "VOC23-ketones_em_speciated_VOC" + - "VOC22_other_alka_em_speciated_VOC_anthro" + - "VOC22_other_alka_em_speciated_VOC" + - "VOC22-other_alka_em_speciated_VOC" + - "VOC21_methanal_em_speciated_VOC_anthro" + - "VOC21_methanal_em_speciated_VOC" + - "VOC21-methanal_em_speciated_VOC" + - "VOC20_chlorinate_em_speciated_VOC_anthro" + - "VOC20_chlorinate_em_speciated_VOC" + - "VOC20-chlorinate_em_speciated_VOC" + - "VOC19_ethers_em_speciated_VOC_anthro" + - "VOC19_ethers_em_speciated_VOC" + - "VOC19-ethers_em_speciated_VOC" + - "VOC18_esters_em_speciated_VOC_anthro" + - "VOC18_esters_em_speciated_VOC" + - "VOC18-esters_em_speciated_VOC" + - "VOC17_other_arom_em_speciated_VOC_anthro" + - "VOC17_other_arom_em_speciated_VOC" + - "VOC17-other_arom_em_speciated_VOC" + - "VOC16_trimethylb_em_speciated_VOC_anthro" + - "VOC16_trimethylb_em_speciated_VOC" + - "VOC16-trimethylb_em_speciated_VOC" + - "VOC15_xylene_em_speciated_VOC_anthro" + - "VOC15_xylene_em_speciated_VOC" + - "VOC15-xylene_em_speciated_VOC" + - "VOC14_toluene_em_speciated_VOC_anthro" + - "VOC14_toluene_em_speciated_VOC" + - "VOC14-toluene_em_speciated_VOC" + - "VOC13_benzene_em_speciated_VOC_anthro" + - "VOC13_benzene_em_speciated_VOC" + - "VOC13-benzene_em_speciated_VOC" + - "VOC12_other_alke_em_speciated_VOC_anthro" + - "VOC12_other_alke_em_speciated_VOC" + - "VOC12-other_alke_em_speciated_VOC" + - "VOC09_ethyne_em_speciated_VOC_anthro" + - "VOC09_ethyne_em_speciated_VOC" + - "VOC09-ethyne_em_speciated_VOC" + - "VOC08_propene_em_speciated_VOC_anthro" + - "VOC08_propene_em_speciated_VOC" + - "VOC08-propene_em_speciated_VOC" + - "VOC07_ethene_em_speciated_VOC_anthro" + - "VOC07_ethene_em_speciated_VOC" + - "VOC07-ethene_em_speciated_VOC" + - "VOC06_hexanes_pl_em_speciated_VOC_anthro" + - "VOC06_hexanes_pl_em_speciated_VOC" + - "VOC06-hexanes_pl_em_speciated_VOC" + - "VOC05_pentanes_em_speciated_VOC_anthro" + - "VOC05_pentanes_em_speciated_VOC" + - "VOC05-pentanes_em_speciated_VOC" + - "VOC04_butanes_em_speciated_VOC_anthro" + - "VOC04_butanes_em_speciated_VOC" + - "VOC04-butanes_em_speciated_VOC" + - "VOC03_propane_em_speciated_VOC_anthro" + - "VOC03_propane_em_speciated_VOC" + - "VOC03-propane_em_speciated_VOC" + - "VOC02_ethane_em_speciated_VOC_anthro" + - "VOC02_ethane_em_speciated_VOC" + - "VOC02-ethane_em_speciated_VOC" + - "VOC01_alcohols_em_speciated_VOC_anthro" + - "VOC01_alcohols_em_speciated_VOC" + - "VOC01-alcohols_em_speciated_VOC" + - "Toluene_lump" + - "TRA" + - "SO2_openburning_share" + - "SO2_em_openburning" + - "SO2_em_anthro" + - "SO2_em_SOLID_BIOFUEL_anthro" + - "SO2_em_AIR_anthro" + - "SO2" + - "SLV" + - "SHP" + - "RSLossRem" + - "RCO" + - "OC_openburning_share" + - "OC_em_openburning" + - "OC_em_anthro" + - "OC_em_SOLID_BIOFUEL_anthro" + - "OC_em_AIR_anthro" + - "OC" + - "NOx_openburning_share" + - "NOx_em_openburning" + - "NOx_em_anthro" + - "NOx_em_SOLID_BIOFUEL_anthro" + - "NOx_em_AIR_anthro" + - "NOx" + - "NMVOC_openburning_share" + - "NMVOC_em_openburning" + - "NMVOC_em_anthro" + - "NMVOC_em_SOLID_BIOFUEL_anthro" + - "NMVOC_em_AIR_anthro" + - "NMVOC_Toluene_lump_speciated_VOC_openburning_share" + - "NMVOC_Toluene_lump_em_speciated_VOC_openburning" + - "NMVOC_MEK_speciated_VOC_openburning_share" + - "NMVOC_MEK_em_speciated_VOC_openburning" + - "NMVOC_Higher_Alkenes_speciated_VOC_openburning_share" + - "NMVOC_Higher_Alkenes_em_speciated_VOC_openburning" + - "NMVOC_Higher_Alkanes_speciated_VOC_openburning_share" + - "NMVOC_Higher_Alkanes_em_speciated_VOC_openburning" + - "NMVOC_HOCH2CHO_speciated_VOC_openburning_share" + - "NMVOC_HOCH2CHO_em_speciated_VOC_openburning" + - "NMVOC_HCOOH_speciated_VOC_openburning_share" + - "NMVOC_HCOOH_em_speciated_VOC_openburning" + - "NMVOC_HCN_speciated_VOC_openburning_share" + - "NMVOC_HCN_em_speciated_VOC_openburning" + - "NMVOC_CH3OH_speciated_VOC_openburning_share" + - "NMVOC_CH3OH_em_speciated_VOC_openburning" + - "NMVOC_CH3COOH_speciated_VOC_openburning_share" + - "NMVOC_CH3COOH_em_speciated_VOC_openburning" + - "NMVOC_CH3COCHO_speciated_VOC_openburning_share" + - "NMVOC_CH3COCHO_em_speciated_VOC_openburning" + - "NMVOC_CH2O_speciated_VOC_openburning_share" + - "NMVOC_CH2O_em_speciated_VOC_openburning" + - "NMVOC_C8H10_speciated_VOC_openburning_share" + - "NMVOC_C8H10_em_speciated_VOC_openburning" + - "NMVOC_C7H8_speciated_VOC_openburning_share" + - "NMVOC_C7H8_em_speciated_VOC_openburning" + - "NMVOC_C6H6_speciated_VOC_openburning_share" + - "NMVOC_C6H6_em_speciated_VOC_openburning" + - "NMVOC_C5H8_speciated_VOC_openburning_share" + - "NMVOC_C5H8_em_speciated_VOC_openburning" + - "NMVOC_C3H8_speciated_VOC_openburning_share" + - "NMVOC_C3H8_em_speciated_VOC_openburning" + - "NMVOC_C3H6_speciated_VOC_openburning_share" + - "NMVOC_C3H6_em_speciated_VOC_openburning" + - "NMVOC_C3H6O_speciated_VOC_openburning_share" + - "NMVOC_C3H6O_em_speciated_VOC_openburning" + - "NMVOC_C2H6_speciated_VOC_openburning_share" + - "NMVOC_C2H6_em_speciated_VOC_openburning" + - "NMVOC_C2H6S_speciated_VOC_openburning_share" + - "NMVOC_C2H6S_em_speciated_VOC_openburning" + - "NMVOC_C2H5OH_speciated_VOC_openburning_share" + - "NMVOC_C2H5OH_em_speciated_VOC_openburning" + - "NMVOC_C2H4_speciated_VOC_openburning_share" + - "NMVOC_C2H4_em_speciated_VOC_openburning" + - "NMVOC_C2H4O_speciated_VOC_openburning_share" + - "NMVOC_C2H4O_em_speciated_VOC_openburning" + - "NMVOC_C2H2_speciated_VOC_openburning_share" + - "NMVOC_C2H2_em_speciated_VOC_openburning" + - "NMVOC_C10H16_speciated_VOC_openburning_share" + - "NMVOC_C10H16_em_speciated_VOC_openburning" + - "NMVOC" + - "NH3_openburning_share" + - "NH3_em_openburning" + - "NH3_em_anthro" + - "NH3_em_SOLID_BIOFUEL_anthro" + - "NH3_em_AIR_anthro" + - "NH3" + - "N2O" + - "MEK" + - "IND" + - "Higher_Alkenes" + - "Higher_Alkanes" + - "HOCH2CHO" + - "HCOOH" + - "HCN" + - "H2_openburning_share" + - "H2_em_openburning" + - "H2SO4_mass" + - "H2" + - "ENE" + - "Delta14co2_in_air" + - "CO_openburning_share" + - "CO_em_openburning" + - "CO_em_anthro" + - "CO_em_SOLID_BIOFUEL_anthro" + - "CO_em_AIR_anthro" + - "CO2_em_anthro" + - "CO2_em_AIR_anthro" + - "CO2" + - "CO" + - "CH4_openburning_share" + - "CH4_em_openburning" + - "CH4_em_anthro" + - "CH4_em_SOLID_BIOFUEL_anthro" + - "CH4_em_AIR_anthro" + - "CH4" + - "CH3OH" + - "CH3COOH" + - "CH3COCHO" + - "CH2O" + - "C8H10" + - "C7H8" + - "C6H6" + - "C5H8" + - "C3H8" + - "C3H6O" + - "C3H6" + - "C2H6S" + - "C2H6" + - "C2H5OH" + - "C2H4O" + - "C2H4" + - "C2H2" + - "C10H16" + - "BC_openburning_share" + - "BC_em_openburning" + - "BC_em_anthro" + - "BC_em_SOLID_BIOFUEL_anthro" + - "BC_em_AIR_anthro" + - "BC" + - "AIR" + - "AGR" diff --git a/configs/downloader/future_usecases/cmip7.yaml b/configs/downloader/future_usecases/cmip7.yaml index 1a31530..100460e 100644 --- a/configs/downloader/future_usecases/cmip7.yaml +++ b/configs/downloader/future_usecases/cmip7.yaml @@ -1,4 +1,4 @@ -models: ["HasGEM3-GC31-LL"] -downloader_kwargs: - variables: ["mrsofc"] - experiments: ["hist-lu"] \ No newline at end of file +CMIP6Plus: + models: ["HasGEM3-GC31-LL"] + variables: [ "mrsofc" ] + experiments: [ "hist-lu" ] diff --git a/configs/downloader/future_usecases/e3sm.yaml b/configs/downloader/future_usecases/e3sm.yaml index 56d4fc6..16e9bdb 100644 --- a/configs/downloader/future_usecases/e3sm.yaml +++ b/configs/downloader/future_usecases/e3sm.yaml @@ -1,4 +1,3 @@ -models: ["E3SM"] -downloader_kwargs: - variables: ["???"] - experiments: ["ssp585"] \ No newline at end of file +E3SM: + variables: [ "???" ] + experiments: [ "ssp585" ] \ No newline at end of file diff --git a/configs/downloader/future_usecases/noresm_ocean.yaml b/configs/downloader/future_usecases/noresm_ocean.yaml new file mode 100644 index 0000000..76b58d6 --- /dev/null +++ b/configs/downloader/future_usecases/noresm_ocean.yaml @@ -0,0 +1,5 @@ +OMIP: + models: ["NorESM2-LM"] + variables: [ "omldamax" ] # sea-surface temperature (often used as forcing for atmo models) + experiments: [ "omip1" ] + ensemble_members: [ "r1i1p1f1" ] \ No newline at end of file diff --git a/configs/downloader/future_usecases/obs4mips.yaml b/configs/downloader/future_usecases/obs4mips.yaml index 8f7f853..9441903 100644 --- a/configs/downloader/future_usecases/obs4mips.yaml +++ b/configs/downloader/future_usecases/obs4mips.yaml @@ -1,4 +1,4 @@ -models: ["ESACCI-CLOUD-ATSR2-AATSR-3-0"] -downloader_kwargs: +#ESACCI-CLOUD-ATSR2-AATSR-3-0: +obs4MIPs: variables: ["pctCCI"] experiments: [""] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_historical.yaml b/configs/downloader/input4mips/bc_historical.yaml index 717e0c2..194b790 100644 --- a/configs/downloader/input4mips/bc_historical.yaml +++ b/configs/downloader/input4mips/bc_historical.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["BC"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + variables: [ "BC" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/bc_ssp.yaml b/configs/downloader/input4mips/bc_ssp.yaml index 1608f92..2d88ff6 100644 --- a/configs/downloader/input4mips/bc_ssp.yaml +++ b/configs/downloader/input4mips/bc_ssp.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["BC"] - experiments: ["ssp585"] \ No newline at end of file +input4MIPs: + variables: [ "BC" ] + experiments: [ "ssp585" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_historical.yaml b/configs/downloader/input4mips/ch4_historical.yaml index 1e5ad30..00784f7 100644 --- a/configs/downloader/input4mips/ch4_historical.yaml +++ b/configs/downloader/input4mips/ch4_historical.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CH4"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + variables: [ "CH4" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/ch4_ssp.yaml b/configs/downloader/input4mips/ch4_ssp.yaml index 4282283..e003e77 100644 --- a/configs/downloader/input4mips/ch4_ssp.yaml +++ b/configs/downloader/input4mips/ch4_ssp.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CH4"] - experiments: ["ssp245"] \ No newline at end of file +input4MIPs: + variables: [ "CH4" ] + experiments: [ "ssp245" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_historical.yaml b/configs/downloader/input4mips/co2_historical.yaml index c415eb8..28a7f8f 100644 --- a/configs/downloader/input4mips/co2_historical.yaml +++ b/configs/downloader/input4mips/co2_historical.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["CO2"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + variables: [ "CO2" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/co2_ssp.yaml b/configs/downloader/input4mips/co2_ssp.yaml index 2a166d4..1646a9d 100644 --- a/configs/downloader/input4mips/co2_ssp.yaml +++ b/configs/downloader/input4mips/co2_ssp.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["C02"] - experiments: ["ssp460"] \ No newline at end of file +input4MIPs: + variables: [ "CO2" ] + experiments: [ "ssp460" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_historical.yaml b/configs/downloader/input4mips/so2_historical.yaml index f639cd3..9be5c8d 100644 --- a/configs/downloader/input4mips/so2_historical.yaml +++ b/configs/downloader/input4mips/so2_historical.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["SO2"] - experiments: ["historical"] \ No newline at end of file +input4MIPs: + variables: [ "SO2" ] + experiments: [ "historical" ] \ No newline at end of file diff --git a/configs/downloader/input4mips/so2_ssp.yaml b/configs/downloader/input4mips/so2_ssp.yaml index 56c5b5b..683b597 100644 --- a/configs/downloader/input4mips/so2_ssp.yaml +++ b/configs/downloader/input4mips/so2_ssp.yaml @@ -1,4 +1,3 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["SO2"] - experiments: ["ssp370"] \ No newline at end of file +input4MIPs: + variables: [ "SO2" ] + experiments: [ "ssp370" ] \ No newline at end of file diff --git a/configs/fgoals_minimal.yaml b/configs/fgoals_minimal.yaml deleted file mode 100644 index 145fa02..0000000 --- a/configs/fgoals_minimal.yaml +++ /dev/null @@ -1,4 +0,0 @@ -models: ["FGOALS-g3"] -downloader_kwargs: - variables: ["tas"] - experiments: ["ssp370"] \ No newline at end of file diff --git a/configs/minimal_dataset.yaml b/configs/minimal_dataset.yaml index af4c0c0..f53da4f 100644 --- a/configs/minimal_dataset.yaml +++ b/configs/minimal_dataset.yaml @@ -1,9 +1,9 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas", "CH4", "CO2"] - experiments: ["historical", "ssp126"] +CMIP6: + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] max_ensemble_members: 1 - overwrite: true - download_biomassburning: false - start_year: 1990 - end_year: 2030 \ No newline at end of file + ensemble_members: ["r2i1p1f1"] +input4MIPs: + variables: [ "CH4", "CO2" ] + experiments: [ "historical","ssp126" ] \ No newline at end of file diff --git a/climateset/download/constants/selected_scenariosMIPs.json b/docs/selected_scenariosMIPs.json similarity index 100% rename from climateset/download/constants/selected_scenariosMIPs.json rename to docs/selected_scenariosMIPs.json diff --git a/noxfile.py b/noxfile.py index 42460cb..c55478b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,22 +1,28 @@ +import re from pathlib import Path import nox +ARG_RE = re.compile(r"^-[-\w=]+$") # e.g. "-k", "--maxfail=1", "tests/foo.py" + nox.options.reuse_existing_virtualenvs = True # Reuse virtual environments nox.options.sessions = ["precommit"] def get_paths(session): package_path = Path(session.bin).parent.parent.parent + main_package = package_path / "climateset" + tests = package_path / "tests" + scripts = package_path / "scripts" return { "all": [ - package_path / "climateset", - package_path / "tests", - package_path / "scripts", + main_package, + tests, + scripts, ], "module": [ - package_path / "climateset", - package_path / "scripts", + main_package, + scripts, ], } @@ -38,6 +44,12 @@ def flake8(session): session.run("poetry", "run", "flake8", *paths["all"], external=True) +@nox.session() +def complexity(session): + paths = get_paths(session) + session.run("poetry", "run", "flake8", "--max-complexity", "7", *paths["all"], external=True) + + @nox.session() def docformatter(session): paths = get_paths(session) @@ -74,6 +86,8 @@ def check(session): @nox.session() def fix(session): paths = get_paths(session) + session.run("poetry", "run", "autoflake", "-v", *paths["all"], external=True) + session.run("poetry", "run", "autopep8", *paths["all"], external=True) session.run("poetry", "run", "black", *paths["all"], external=True) session.run("poetry", "run", "isort", *paths["all"], external=True) session.run("poetry", "run", "flynt", *paths["all"], external=True) @@ -94,6 +108,18 @@ def precommit(session): session.run("poetry", "run", "pre-commit", "run", "--all-files", external=True) +@nox.session() +def autoflake(session): + paths = get_paths(session) + session.run("poetry", "run", "autoflake", "-v", *paths["all"], external=True) + + +@nox.session() +def autopep(session): + paths = get_paths(session) + session.run("poetry", "run", "autopep8", *paths["all"], external=True) + + @nox.session() def black(session): paths = get_paths(session) @@ -112,6 +138,24 @@ def flynt(session): session.run("poetry", "run", "flynt", *paths["all"], external=True) +@nox.session(name="ruff-lint") +def ruff_lint(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "check", *paths["all"], external=True) + + +@nox.session(name="ruff-fix") +def ruff_fix(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "check", "--fix", *paths["all"], external=True) + + +@nox.session(name="ruff-format") +def ruff_format(session): + paths = get_paths(session) + session.run("poetry", "run", "ruff", "format", *paths["all"], external=True) + + @nox.session() def test(session): session.run("poetry", "run", "pytest", external=True) @@ -119,8 +163,12 @@ def test(session): @nox.session() def test_custom(session): + for a in session.posargs: + if not ARG_RE.match(a): + session.error(f"unsafe pytest argument detected: {a!r}") + session.run( - "poetry", "run", "pytest", external=True, *session.posargs + "poetry", "run", "python", "-m", "pytest", external=True, *session.posargs ) # Pass additional arguments directly to pytest diff --git a/poetry.lock b/poetry.lock index bf4e591..99a465e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -210,6 +210,36 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "autoflake" +version = "2.3.1" +description = "Removes unused imports and unused variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"}, + {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"}, +] + +[package.dependencies] +pyflakes = ">=3.0.0" +tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} + +[[package]] +name = "autopep8" +version = "2.3.2" +description = "A tool that automatically formats Python code to conform to the PEP 8 style guide" +optional = false +python-versions = ">=3.9" +files = [ + {file = "autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128"}, + {file = "autopep8-2.3.2.tar.gz", hash = "sha256:89440a4f969197b69a995e4ce0661b031f455a9f776d2c5ba3dbd83466931758"}, +] + +[package.dependencies] +pycodestyle = ">=2.12.0" +tomli = {version = "*", markers = "python_version < \"3.11\""} + [[package]] name = "babel" version = "2.14.0" @@ -925,18 +955,18 @@ typing = ["typing-extensions (>=4.8)"] [[package]] name = "flake8" -version = "7.0.0" +version = "7.1.2" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" files = [ - {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, - {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, + {file = "flake8-7.1.2-py2.py3-none-any.whl", hash = "sha256:1cbc62e65536f65e6d754dfe6f1bada7f5cf392d6f5db3c2b85892466c3e7c1a"}, + {file = "flake8-7.1.2.tar.gz", hash = "sha256:c586ffd0b41540951ae41af572e6790dbd49fc12b3aa2541685d253d9bd504bd"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" +pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" [[package]] @@ -2448,13 +2478,13 @@ tests = ["pytest"] [[package]] name = "pycodestyle" -version = "2.11.1" +version = "2.12.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, + {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, + {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, ] [[package]] @@ -3175,6 +3205,33 @@ files = [ {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"}, ] +[[package]] +name = "ruff" +version = "0.11.11" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.11.11-py3-none-linux_armv6l.whl", hash = "sha256:9924e5ae54125ed8958a4f7de320dab7380f6e9fa3195e3dc3b137c6842a0092"}, + {file = "ruff-0.11.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:c8a93276393d91e952f790148eb226658dd275cddfde96c6ca304873f11d2ae4"}, + {file = "ruff-0.11.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6e333dbe2e6ae84cdedefa943dfd6434753ad321764fd937eef9d6b62022bcd"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7885d9a5e4c77b24e8c88aba8c80be9255fa22ab326019dac2356cff42089fc6"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1b5ab797fcc09121ed82e9b12b6f27e34859e4227080a42d090881be888755d4"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e231ff3132c1119ece836487a02785f099a43992b95c2f62847d29bace3c75ac"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:a97c9babe1d4081037a90289986925726b802d180cca784ac8da2bbbc335f709"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8c4ddcbe8a19f59f57fd814b8b117d4fcea9bee7c0492e6cf5fdc22cfa563c8"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6224076c344a7694c6fbbb70d4f2a7b730f6d47d2a9dc1e7f9d9bb583faf390b"}, + {file = "ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:882821fcdf7ae8db7a951df1903d9cb032bbe838852e5fc3c2b6c3ab54e39875"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:dcec2d50756463d9df075a26a85a6affbc1b0148873da3997286caf1ce03cae1"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:99c28505ecbaeb6594701a74e395b187ee083ee26478c1a795d35084d53ebd81"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9263f9e5aa4ff1dec765e99810f1cc53f0c868c5329b69f13845f699fe74f639"}, + {file = "ruff-0.11.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:64ac6f885e3ecb2fdbb71de2701d4e34526651f1e8503af8fb30d4915a3fe345"}, + {file = "ruff-0.11.11-py3-none-win32.whl", hash = "sha256:1adcb9a18802268aaa891ffb67b1c94cd70578f126637118e8099b8e4adcf112"}, + {file = "ruff-0.11.11-py3-none-win_amd64.whl", hash = "sha256:748b4bb245f11e91a04a4ff0f96e386711df0a30412b9fe0c74d5bdc0e4a531f"}, + {file = "ruff-0.11.11-py3-none-win_arm64.whl", hash = "sha256:6c51f136c0364ab1b774767aa8b86331bd8e9d414e2d107db7a2189f35ea1f7b"}, + {file = "ruff-0.11.11.tar.gz", hash = "sha256:7774173cc7c1980e6bf67569ebb7085989a78a103922fb83ef3dfe230cd0687d"}, +] + [[package]] name = "scipy" version = "1.13.1" @@ -3838,4 +3895,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "f022cd016ae910e8f019261294ca552fd34ebbc217b804d9be4aa5fe24d1446d" +content-hash = "df5d5315e96bcaea935280b93b4373244fdfbfe27df154f149483351d1d3642e" diff --git a/pyproject.toml b/pyproject.toml index 66f0c2f..8865508 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,10 @@ pre-commit = "^3.7.0" flake8-pyproject = "^1.2.3" black = "^24.4.2" nox = "^2024.4.15" -docformatter = {extras = ["tomli"], version = "^1.7.5"} +docformatter = { extras = ["tomli"], version = "^1.7.5" } +autoflake = "^2.3.1" +autopep8 = "^2.3.2" +ruff = "^0.11.11" [tool.poetry.group.lab.dependencies] jupyterlab = "^4.0.10" @@ -98,21 +101,26 @@ replace = ''' ''' [tool.pylint] -disable = "C0114,C0115,C0116,R0903,R1710,W1203,W0511,W0718,C0302" +disable = [ + "C0114", + "C0115", + "C0116", + "R0903", + "R1710", + "W1203", + "W0511", # TO DO warnings +] max-line-length = 120 max-locals = 20 max-args = 16 +max-positional-arguments = 16 max-attributes = 20 -ignore = [ - ".git", - "migrations", - "__pycache__" -] [tool.flake8] max-line-length = 120 ignore = ["E203", "E266", "E501", "W503"] -max-complexity = 20 +docstring-convention = "numpy" +max-complexity = 18 per-file-ignores = [] exclude = [ ".git", @@ -120,6 +128,20 @@ exclude = [ "__pycache__" ] +[tool.autoflake] +remove-all-unused-imports = true +in-place = true +ignore-init-module-imports = true +remove-unused-variables = true +recursive = true + +[tool.autopep8] +max_line_length = 120 +in-place = true +aggressive = 2 +ignore = ["W503", "E203", "E501"] +recursive = true + [tool.black] line-length = 120 target-version = ["py311"] @@ -132,17 +154,72 @@ line-length = 120 transform-concats = true verbose = true -[tool.pytest.ini_options] -testpaths = [ - "tests", -] -markers = ["offline: mark a test as needing to be run offline.", ] - [tool.docformatter] -style = "google" +style = "numpy" pre-summary-newline = true wrap-descriptions = 120 wrap-summaries = 120 blank = false exclude = [] recursive = true + +[tool.ruff] +line-length = 120 + +target-version = "py311" + +exclude = [ + ".git", + "migrations", + "__pycache__", +] + +[tool.ruff.lint] +select = [ + "A", # Flake8 Built ins + "E", # Error (Flake8) + "F", # Pyflakes (Flake8) + "W", # Warning (Flake8) + "I", # isort (import sorting) + "N", # Naming conventions (Pylint, etc.) + "C90", # mccabe complexity (replaces flake8 --max-complexity and mccabe) + "B", # Bugbear (common linting issues) + "UP", # pyupgrade (suggests modern Python syntax) + "PLR", # Pylint refactor + "PLE", # Pylint error (specific Pylint error rules) + "PLW", # Pylint warning (specific Pylint warning rules) + "PLC", # Pylint convention (specific Pylint convention rules) + "R", # Refactor (Pylint refactoring suggestions) + "TID", # TO DO comments + "FAST",# FastAPI + "C4", # List and dict comprehensions + "DJ", # Django + "PIE", # Returns and unnecessary returns + "Q", # Double quotes + "RET", # Fix return statements + "PTH", # Enforce pathlib + "ARG", # Unused argument + "FLY", # Flynt + "NPY", # Numpy specific + "PD", # Pandas specific + "RUF", # Ruff specific +] + +ignore = [ + "E203", # whitespace before ':', Black already handles this + "E266", # too many leading '#' for comments + "E501", # line too long (we enforce via line-length instead) + "RET504", + "RUF013", + "PTH123" +] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" # Corresponds to flake8's docstring-convention and docformatter style. + +[tool.ruff.lint.pylint] +max-args = 16 + +[tool.ruff.lint.mccabe] +# cyclomatic complexity +max-complexity = 18 diff --git a/scripts/download_example.py b/scripts/download_example.py new file mode 100644 index 0000000..733fa67 --- /dev/null +++ b/scripts/download_example.py @@ -0,0 +1,71 @@ +import typer + +from climateset import CONFIGS +from climateset.download import download_from_config_file, downloader_config +from climateset.download.cmip6_downloader import CMIP6Downloader +from climateset.download.input4mips_downloader import Input4MipsDownloader + +app = typer.Typer(no_args_is_help=True) + +CONFIG_PATH = CONFIGS / "minimal_dataset.yaml" + + +@app.command( + name="download-basic", + help="Download ClimateSet data by building the config objects. See function content for more details.", +) +def basic_download(): + """ + By default, will download to the DATA_DIR folder. You can override this behavior modifying the config objects or by + adding the `data_dir` key in the config file under each project. + + ex. + CMIP6: + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + input4MIPs: + variables: [ "CO2", "CH4" ] + experiments: [ "historical","ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + """ + input4mips_config = downloader_config.create_input4mips_downloader_config_from_file(CONFIG_PATH) + cmip6_config = downloader_config.create_cmip6_downloader_config_from_file(CONFIG_PATH) + + # If you want to specify where data will be downloaded, change the following: + # input4mips_config.data_dir = "PATH_TO_DATA_DIR" + # cmip6_config.data_dir = "PATH_TO_DATA_DIR" + + input4mips_downloader = Input4MipsDownloader(input4mips_config) + input4mips_downloader.download() + + cmip6_downloader = CMIP6Downloader(cmip6_config) + cmip6_downloader.download() + + +@app.command( + name="download-from-config", + help="Download ClimateSet data via download_from_config_file() function. See function content for more details.", +) +def alternative_approach(): + """ + By default, will download to the DATA_DIR folder. You can override this behavior by adding the `data_dir` key in the + config file under each project. + + ex. + CMIP6: + models: [ "NorESM2-LM" ] + variables: [ "tas" ] + experiments: [ "historical", "ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + input4MIPs: + variables: [ "CO2", "CH4" ] + experiments: [ "historical","ssp126" ] + data_dir: "PATH/TO/DATA_DIR" + """ + download_from_config_file(CONFIG_PATH) + + +if __name__ == "__main__": + app() diff --git a/tests/resources/test_minimal_dataset.yaml b/tests/resources/test_minimal_dataset.yaml index 86bf9c0..a09f942 100644 --- a/tests/resources/test_minimal_dataset.yaml +++ b/tests/resources/test_minimal_dataset.yaml @@ -1,4 +1,7 @@ -models: ["NorESM2-LM"] -downloader_kwargs: - variables: ["tas", "CO2", "CH4"] - experiments: ["historical", "ssp126"] \ No newline at end of file +CMIP6: + models: ["NorESM2-LM"] + variables: ["tas"] + experiments: ["ssp126"] +input4MIPs: + variables: ["CO2", "CH4"] + experiments: ["historical","ssp126"] \ No newline at end of file diff --git a/tests/test_download/test_downloader.py b/tests/test_download/test_downloader.py index 83a25ae..b6223bb 100644 --- a/tests/test_download/test_downloader.py +++ b/tests/test_download/test_downloader.py @@ -1,18 +1,29 @@ +import shutil from unittest.mock import call, patch import pytest from climateset import TEST_DIR -from climateset.download.downloader import Downloader, download_from_config_file -from climateset.utils import get_yaml_config +from climateset.download.cmip6_downloader import CMIP6Downloader +from climateset.download.constants.esgf import CMIP6, INPUT4MIPS +from climateset.download.downloader import download_from_config_file +from climateset.download.downloader_config import ( + create_cmip6_downloader_config_from_file, + create_input4mips_downloader_config_from_file, +) +from climateset.download.input4mips_downloader import Input4MipsDownloader MINIMAL_DATASET_CONFIG_PATH = TEST_DIR / "resources/test_minimal_dataset.yaml" +TEST_TMP_DIR = TEST_DIR / "resources/.tmp" +MAX_ENSEMBLE_MEMBERS = 10 -DOWNLOAD_RAW_INPUT_SINGLE_VAR = "climateset.download.downloader.Downloader.download_raw_input_single_var" +DOWNLOAD_RAW_INPUT_SINGLE_VAR = ( + "climateset.download.input4mips_downloader.Input4MipsDownloader.download_raw_input_single_var" +) DOWNLOAD_META_HISTORIC_SINGLE_VAR = ( - "climateset.download.downloader.Downloader.download_meta_historic_biomassburning_single_var" + "climateset.download.input4mips_downloader.Input4MipsDownloader.download_meta_historic_biomassburning_single_var" ) -DOWNLOAD_MODEL_SINGLE_VAR = "climateset.download.downloader.Downloader.download_from_model_single_var" +DOWNLOAD_MODEL_SINGLE_VAR = "climateset.download.cmip6_downloader.CMIP6Downloader.download_from_model_single_var" SUBPROCESS_RUN = "subprocess.run" EXPECTED_MINIMAL_RAW_INPUT_CALLS = [ @@ -25,6 +36,7 @@ call(variable="CH4", institution_id="VUA"), call(variable="CH4_openburning_share", institution_id="IAMC"), ] +RAW_INPUT_NUM_OF_CALLS = 8 EXPECTED_MINIMAL_META_HISTORIC_CALLS = [ call(variable="CH4_percentage_AGRI", institution_id="VUA"), @@ -34,11 +46,16 @@ call(variable="CH4_percentage_SAVA", institution_id="VUA"), call(variable="CH4_percentage_TEMF", institution_id="VUA"), ] +META_HISTORIC_NUM_OF_CALLS = 6 EXPECTED_MINIMAL_MODEL_CALLS = [ - call(variable="tas", experiment="historical"), - call(variable="tas", experiment="ssp126"), + call(model="NorESM2-LM", project="CMIP6", variable="tas", experiment="ssp126"), ] +MODEL_SINGLE_NUM_OF_CALLS = 1 + + +def delete_tmp_dir(): + shutil.rmtree(TEST_TMP_DIR, ignore_errors=True) @pytest.fixture() @@ -66,38 +83,51 @@ def mock_subprocess_run(): @pytest.fixture -def simple_downloader_object(): +def input4mips_downloader_object(): config_source = MINIMAL_DATASET_CONFIG_PATH - config = get_yaml_config(config_source) - model = config["models"][0] - downloader_kwargs = config["downloader_kwargs"] - return Downloader(model=model, **downloader_kwargs) + config = create_input4mips_downloader_config_from_file(config_source) + config.data_dir = TEST_TMP_DIR + yield Input4MipsDownloader(config=config) + delete_tmp_dir() -def test_downloader_init(simple_downloader_object): - assert isinstance(simple_downloader_object, Downloader) +@pytest.fixture +def cmip6_downloader_object(): + config_source = MINIMAL_DATASET_CONFIG_PATH + config = create_cmip6_downloader_config_from_file(config_source) + config.data_dir = TEST_TMP_DIR + yield CMIP6Downloader(config=config) + delete_tmp_dir() + +def test_downloader_init(input4mips_downloader_object, cmip6_downloader_object): + assert isinstance(input4mips_downloader_object, Input4MipsDownloader) + assert isinstance(cmip6_downloader_object, CMIP6Downloader) -def test_downloader_base_params(simple_downloader_object): - assert simple_downloader_object.model == "NorESM2-LM" - assert simple_downloader_object.experiments == ["historical", "ssp126"] +def test_downloader_base_params(input4mips_downloader_object, cmip6_downloader_object): + assert input4mips_downloader_object.config.project == INPUT4MIPS + assert input4mips_downloader_object.config.experiments == ["historical", "ssp126"] + assert cmip6_downloader_object.config.project == CMIP6 + assert cmip6_downloader_object.config.models == ["NorESM2-LM"] + assert cmip6_downloader_object.config.experiments == ["ssp126"] -def test_downloader_max_possible_member_number(simple_downloader_object): - assert simple_downloader_object.max_ensemble_members == 1 +def test_downloader_max_possible_member_number(cmip6_downloader_object): + assert cmip6_downloader_object.config.max_ensemble_members == MAX_ENSEMBLE_MEMBERS -def test_downloader_variables(simple_downloader_object): - assert simple_downloader_object.raw_vars == [ + +def test_downloader_variables(input4mips_downloader_object, cmip6_downloader_object): + assert cmip6_downloader_object.config.variables == ["tas"] + assert input4mips_downloader_object.config.variables == [ "CO2_em_anthro", "CO2_em_AIR_anthro", "CH4_em_openburning", "CH4_em_anthro", "CH4_em_AIR_anthro", ] - assert simple_downloader_object.biomass_vars == ["CO2", "CH4"] - assert simple_downloader_object.model_vars == ["tas"] - assert simple_downloader_object.meta_vars_percentage == [ + assert input4mips_downloader_object.config.biomass_vars == ["CO2", "CH4"] + assert input4mips_downloader_object.config.meta_vars_percentage == [ "CH4_percentage_AGRI", "CH4_percentage_BORF", "CH4_percentage_DEFO", @@ -105,39 +135,40 @@ def test_downloader_variables(simple_downloader_object): "CH4_percentage_SAVA", "CH4_percentage_TEMF", ] - assert simple_downloader_object.meta_vars_share == ["CH4_openburning_share"] + assert input4mips_downloader_object.config.meta_vars_share == ["CH4_openburning_share"] -def test_downloader_model_params(simple_downloader_object): - assert simple_downloader_object.model_node_link == "https://esgf-data.dkrz.de/esg-search" - assert simple_downloader_object.model_source_center == "NCC" +@pytest.mark.xfail +def test_downloader_model_params(cmip6_downloader_object): + # TODO refactor this test for new Node list + assert cmip6_downloader_object.config.node_link in "https://esgf-node.llnl.gov/esg-search/" -def test_download_raw_input(simple_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): - simple_downloader_object.download_raw_input() +def test_download_raw_input(input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var): + input4mips_downloader_object.download() assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS - assert mock_raw_input_single_var.call_count == 8 + assert mock_raw_input_single_var.call_count == RAW_INPUT_NUM_OF_CALLS assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS - assert mock_meta_historic_single_var.call_count == 6 + assert mock_meta_historic_single_var.call_count == META_HISTORIC_NUM_OF_CALLS -def test_download_from_model(simple_downloader_object, mock_model_single_var): - simple_downloader_object.download_from_model() +def test_download_from_model(cmip6_downloader_object, mock_model_single_var): + cmip6_downloader_object.download() assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS - assert mock_model_single_var.call_count == 2 + assert mock_model_single_var.call_count == 1 def test_download_from_config_file( - simple_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var, mock_model_single_var + input4mips_downloader_object, mock_raw_input_single_var, mock_meta_historic_single_var, mock_model_single_var ): - download_from_config_file(config=MINIMAL_DATASET_CONFIG_PATH) + download_from_config_file(config_file=MINIMAL_DATASET_CONFIG_PATH) assert mock_raw_input_single_var.call_args_list == EXPECTED_MINIMAL_RAW_INPUT_CALLS - assert mock_raw_input_single_var.call_count == 8 + assert mock_raw_input_single_var.call_count == RAW_INPUT_NUM_OF_CALLS assert mock_meta_historic_single_var.call_args_list == EXPECTED_MINIMAL_META_HISTORIC_CALLS - assert mock_meta_historic_single_var.call_count == 6 + assert mock_meta_historic_single_var.call_count == META_HISTORIC_NUM_OF_CALLS assert mock_model_single_var.call_args_list == EXPECTED_MINIMAL_MODEL_CALLS - assert mock_model_single_var.call_count == 2 + assert mock_model_single_var.call_count == MODEL_SINGLE_NUM_OF_CALLS def _assert_content_is_in_wget_script(mock_call, string_content): @@ -145,34 +176,39 @@ def _assert_content_is_in_wget_script(mock_call, string_content): # With the provided inputs, there should be only 1 call. # We then access the call's arguments. We are interested in # the content of the wget script that is generated, and we - # want to make sure that for there inputs, we get the same files + # want to make sure that for the same inputs, we get the same files call_list = mock_call.call_args_list first_and_only_call = call_list[0] call_arguments = first_and_only_call.args[0] wget_script_content = call_arguments[2] + print(string_content) + print(wget_script_content) assert string_content in wget_script_content -def test_download_raw_input_single_var(simple_downloader_object, mock_subprocess_run): +def test_download_raw_input_single_var(input4mips_downloader_object, mock_subprocess_run): download_subprocess = mock_subprocess_run - simple_downloader_object.download_raw_input_single_var(variable="CO2_em_anthro", institution_id="PNNL-JGCRI") + input4mips_downloader_object.download_raw_input_single_var(variable="CO2_em_anthro", institution_id="PNNL-JGCRI") + # These are partial file strings. Since we download multiple variables at the same time, it's very complicated + # to specify versions for each without becoming cubbersome. Therefore, this test just looks for the file parts + # That don't change once a new version gets published (which made this test crash and required updating this + # variable expected_files = [ - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_175001-179912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_180001-184912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_185001-185012.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_185101-189912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_190001-194912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_195001-199912.nc", - "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-2017-05-18_gn_200001-201412.nc", + "CO2-em-anthro_input4MIPs_emissions_CMIP_CEDS-CMIP-", + "_gn_175001-179912.nc", + "_gn_180001-184912.nc", + "_gn_185001-189912.nc", + "_gn_190001-194912.nc", + "_gn_195001-199912.nc", ] download_subprocess.assert_called_once() for f in expected_files: _assert_content_is_in_wget_script(download_subprocess, f) -def test_download_meta_historic_biomassburning_single_var(simple_downloader_object, mock_subprocess_run): - simple_downloader_object.download_meta_historic_biomassburning_single_var( +def test_download_meta_historic_biomassburning_single_var(input4mips_downloader_object, mock_subprocess_run): + input4mips_downloader_object.download_meta_historic_biomassburning_single_var( variable="CH4_percentage_AGRI", institution_id="VUA" ) @@ -184,8 +220,8 @@ def test_download_meta_historic_biomassburning_single_var(simple_downloader_obje _assert_content_is_in_wget_script(mock_call=mock_subprocess_run, string_content=f) -def test_download_from_model_single_var(simple_downloader_object, mock_subprocess_run): - simple_downloader_object.download_from_model_single_var(variable="tas", experiment="ssp126") +def test_download_from_model_single_var(cmip6_downloader_object, mock_subprocess_run): + cmip6_downloader_object.download() expected_files = [ "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_201501-202012.nc", @@ -198,6 +234,6 @@ def test_download_from_model_single_var(simple_downloader_object, mock_subproces "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_208101-209012.nc", "tas_Amon_NorESM2-LM_ssp126_r1i1p1f1_gn_209101-210012.nc", ] - mock_subprocess_run.assert_called_once() + mock_subprocess_run.assert_called() for f in expected_files: _assert_content_is_in_wget_script(mock_call=mock_subprocess_run, string_content=f)