diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index 6989f722..26cdcb6a 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -42,6 +42,11 @@ jobs: cd CMEW cylc validate -O metoffice . + - name: Run Cylc unit tests + run: | + cd CMEW + conda run -n cmew cylc vip -O metoffice -O unittest . + - name: Run Cylc configuration linter run: | eval "$(conda shell.bash hook)" diff --git a/CMEW/app/configure_standardise/bin/create_request_file.py b/CMEW/app/configure_standardise/bin/create_request_file.py index f4f42820..db8a44ca 100755 --- a/CMEW/app/configure_standardise/bin/create_request_file.py +++ b/CMEW/app/configure_standardise/bin/create_request_file.py @@ -17,9 +17,16 @@ def create_request(): configparser.ConfigParser() CDDS request configuration. """ + extract_flag = os.environ.get("EXTRACT", "true").lower() == "true" + extract_data_path = os.environ.get("EXTRACT_DATA_PATH", "").strip() end_year = int(os.environ["START_YEAR"]) + int( os.environ["NUMBER_OF_YEARS"] ) + if not extract_flag and not extract_data_path: + raise ValueError( + "EXTRACT=False but EXTRACT_DATA_PATH is empty. " + "Provide a full path to previously extracted model output." + ) request = configparser.ConfigParser() request["metadata"] = { "base_date": "1850-01-01T00:00:00", @@ -47,6 +54,8 @@ def create_request(): "root_data_dir": os.environ["ROOT_DATA_DIR"], "workflow_basename": "CMEW", } + if not extract_flag: + request["common"]["root_data_dir"] = extract_data_path request["data"] = { "end_date": f"{end_year}-01-01T00:00:00", "mass_data_class": "crum", @@ -65,6 +74,9 @@ def create_request(): "skip_archive": "True", "cylc_args": "--no-detach -v", } + if not extract_flag: + request["conversion"]["skip_extract"] = "True" + return request diff --git a/CMEW/app/configure_standardise/bin/test_create_request_file.py b/CMEW/app/configure_standardise/bin/test_create_request_file.py index 0f57a306..1d049f99 100644 --- a/CMEW/app/configure_standardise/bin/test_create_request_file.py +++ b/CMEW/app/configure_standardise/bin/test_create_request_file.py @@ -1,12 +1,17 @@ # (C) Crown Copyright 2024-2025, Met Office. # The LICENSE.md file contains full licensing details. + import os +import pytest from create_request_file import create_request -def test_create_request(monkeypatch): - # In the order defined in 'create_request_file.py'. +def _set_base_env(monkeypatch): + """ + Set the base environment variables needed by create_request(). + In the order defined in 'create_request_file.py'. + """ monkeypatch.setenv("START_YEAR", "1993") monkeypatch.setenv("NUMBER_OF_YEARS", "1") monkeypatch.setenv("CALENDAR", "360_day") @@ -18,10 +23,24 @@ def test_create_request(monkeypatch): monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt") monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1") + +def _clear_extract_env(monkeypatch): + """Ensure EXTRACT/EXTRACT_DATA_PATH do not leak from the suite env.""" + monkeypatch.delenv("EXTRACT", raising=False) + monkeypatch.delenv("EXTRACT_DATA_PATH", raising=False) + + +def test_create_request_default_extract(monkeypatch): + """EXTRACT default (True) - no skip_extract, root_data_dir unchanged.""" + _clear_extract_env(monkeypatch) + _set_base_env(monkeypatch) + # Do not set EXTRACT or EXTRACT_DATA_PATH → EXTRACT defaults to True. + config = create_request() actual = { section: dict(config.items(section)) for section in config.sections() } + expected = { "metadata": { "branch_method": "no parent", @@ -66,6 +85,43 @@ def test_create_request(monkeypatch): "mip_convert_plugin": "UKESM1", "skip_archive": "True", "cylc_args": "--no-detach -v", + # NOTE: no 'skip_extract' key when EXTRACT defaults to True }, } + assert actual == expected + + +def test_create_request_extract_false_with_path(monkeypatch): + """EXTRACT=False + EXTRACT_DATA_PATH set → + skip_extract + override root_data_dir.""" + _clear_extract_env(monkeypatch) + _set_base_env(monkeypatch) + monkeypatch.setenv("EXTRACT", "false") + monkeypatch.setenv("EXTRACT_DATA_PATH", "/pre/extracted/data") + + config = create_request() + actual = { + section: dict(config.items(section)) for section in config.sections() + } + + # Ensure skip_extract is set in [conversion] + assert actual["conversion"]["skip_extract"] == "True" + + # Ensure root_data_dir is taken from EXTRACT_DATA_PATH, not ROOT_DATA_DIR + assert actual["common"]["root_data_dir"] == "/pre/extracted/data" + + # Optional extra checks to ensure other keys unaffected: + assert actual["common"]["root_proc_dir"] == "/path/to/proc/dir/" + assert actual["conversion"]["mip_convert_plugin"] == "UKESM1" + + +def test_create_request_extract_false_without_path_raises(monkeypatch): + """EXTRACT=False with no EXTRACT_DATA_PATH → fail with ValueError.""" + _clear_extract_env(monkeypatch) + _set_base_env(monkeypatch) + monkeypatch.setenv("EXTRACT", "false") + # EXTRACT_DATA_PATH intentionally not set (or could set to empty) + + with pytest.raises(ValueError, match="EXTRACT=False"): + create_request() diff --git a/CMEW/flow.cylc b/CMEW/flow.cylc index 1979ebd6..b614c2c7 100644 --- a/CMEW/flow.cylc +++ b/CMEW/flow.cylc @@ -21,7 +21,12 @@ install_env_file => configure_recipe & configure_for configure_for => configure_standardise configure_recipe => run_recipe - configure_standardise => standardise_model_data => run_recipe & housekeeping + configure_standardise => standardise_model_data + {%- if HOUSEKEEPING in (True, "true", "True") %} + standardise_model_data => run_recipe & housekeeping + {%- else %} + standardise_model_data => run_recipe + {%- endif %} {%- if TEST %} run_recipe => compare {%- endif %} @@ -35,6 +40,8 @@ platform = localhost [[[environment]]] ESMVALTOOL_MODULE_NAME = {{ ESMVALTOOL_MODULE_NAME }} + EXTRACT = {{ EXTRACT }} + EXTRACT_DATA_PATH = {{ EXTRACT_DATA_PATH }} USER_CONFIG_DIR = ${CYLC_WORKFLOW_SHARE_DIR}/etc USER_CONFIG_PATH = ${USER_CONFIG_DIR}/config-user.yml OUTPUT_DIR = ${CYLC_WORKFLOW_SHARE_DIR}/cycle/${CYLC_TASK_CYCLE_POINT} @@ -57,6 +64,13 @@ # Workaround for bug in CDDS: ROOT_SOFTWARE_DIR: unbound variable. ROOT_SOFTWARE_DIR = ${CDDS_SOFTWARE_DIR} CDDS_VERSION = {{ CDDS_VERSION }} + {%- if EXTRACT in (False, "false", "False") %} + # When EXTRACT is False, reuse pre-extracted data from EXTRACT_DATA_PATH. + # Align ROOT_DATA_DIR (used by restructure_for_cmip6) with the path + # that create_request_file.py writes into [common].root_data_dir. + ROOT_DATA_DIR = "{{ EXTRACT_DATA_PATH }}" + {%- endif %} + # Multi-run metadata family: reference and evaluation runs [[MODEL_RUNS]] diff --git a/CMEW/meta/rose-meta.conf b/CMEW/meta/rose-meta.conf index 788425ce..908bd1a7 100644 --- a/CMEW/meta/rose-meta.conf +++ b/CMEW/meta/rose-meta.conf @@ -104,6 +104,30 @@ help=If required, this value must be set in a site-specific configuration file =empty string. type=quoted +[template variables=EXTRACT] +compulsory=true +description=To switch OFF or ON CDDS extract +help=The default is true, so data are extracted from MASS prior to a run. +sort-key=17 +type=boolean + +[template variables=EXTRACT_DATA_PATH] +compulsory=true +description=The path to extracted data already available. +help=The default is "", so data to be extracted from MASS. If "", EXTRACT + =must be set to true, otherwise an Error will be raised. +sort-key=18 +type=quoted + +[template variables=HOUSEKEEPING] +compulsory=true +description=To switch OFF or ON the housekeeping task. +help=The default is true, so the locally staged CDDS workspace is removed. + =Make sure you set it to false if you want to keep the data and run + =with EXTRACT=true +sort-key=16 +type=boolean + [template variables=LABEL_FOR_PLOTS] compulsory=false description=An 'alias' for the evaluation dataset in the ESMValTool recipe. diff --git a/CMEW/rose-suite.conf b/CMEW/rose-suite.conf index a0a885d2..2e65f8bd 100644 --- a/CMEW/rose-suite.conf +++ b/CMEW/rose-suite.conf @@ -14,6 +14,9 @@ DRS_OBS="default" DRS_OBS4MIPS="default" DRS_OBS6="default" ESMVALTOOL_MODULE_NAME="" +EXTRACT=true +EXTRACT_DATA_PATH="" +HOUSEKEEPING=true LABEL_FOR_PLOTS="UKESM1.0 N96ORCA1" MAX_PARALLEL_TASKS=4 MODEL_ID="UKESM1-0-LL"