Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ jobs:
cd CMEW
cylc validate -O metoffice .

- name: Run Cylc unit tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised that this is part of this issue

run: |
cd CMEW
conda run -n cmew cylc vip -O metoffice -O unittest .
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were also yesterday having some difficulties with -O unittest as opposed to -O test


- name: Run Cylc configuration linter
run: |
eval "$(conda shell.bash hook)"
Expand Down
12 changes: 12 additions & 0 deletions CMEW/app/configure_standardise/bin/create_request_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,16 @@ def create_request():
configparser.ConfigParser()
CDDS request configuration.
"""
extract_flag = os.environ.get("EXTRACT", "true").lower() == "true"
extract_data_path = os.environ.get("EXTRACT_DATA_PATH", "").strip()
end_year = int(os.environ["START_YEAR"]) + int(
os.environ["NUMBER_OF_YEARS"]
)
if not extract_flag and not extract_data_path:
raise ValueError(
"EXTRACT=False but EXTRACT_DATA_PATH is empty. "
"Provide a full path to previously extracted model output."
)
request = configparser.ConfigParser()
request["metadata"] = {
"base_date": "1850-01-01T00:00:00",
Expand Down Expand Up @@ -47,6 +54,8 @@ def create_request():
"root_data_dir": os.environ["ROOT_DATA_DIR"],
"workflow_basename": "CMEW",
}
if not extract_flag:
request["common"]["root_data_dir"] = extract_data_path
request["data"] = {
"end_date": f"{end_year}-01-01T00:00:00",
"mass_data_class": "crum",
Expand All @@ -65,6 +74,9 @@ def create_request():
"skip_archive": "True",
"cylc_args": "--no-detach -v",
}
if not extract_flag:
request["conversion"]["skip_extract"] = "True"

return request


Expand Down
60 changes: 58 additions & 2 deletions CMEW/app/configure_standardise/bin/test_create_request_file.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
# (C) Crown Copyright 2024-2025, Met Office.
# The LICENSE.md file contains full licensing details.

import os
import pytest

from create_request_file import create_request


def test_create_request(monkeypatch):
# In the order defined in 'create_request_file.py'.
def _set_base_env(monkeypatch):
"""
Set the base environment variables needed by create_request().
In the order defined in 'create_request_file.py'.
"""
monkeypatch.setenv("START_YEAR", "1993")
monkeypatch.setenv("NUMBER_OF_YEARS", "1")
monkeypatch.setenv("CALENDAR", "360_day")
Expand All @@ -18,10 +23,24 @@ def test_create_request(monkeypatch):
monkeypatch.setenv("VARIABLES_PATH", "/path/to/variables.txt")
monkeypatch.setenv("VARIANT_LABEL", "r1i1p1f1")


def _clear_extract_env(monkeypatch):
"""Ensure EXTRACT/EXTRACT_DATA_PATH do not leak from the suite env."""
monkeypatch.delenv("EXTRACT", raising=False)
monkeypatch.delenv("EXTRACT_DATA_PATH", raising=False)


def test_create_request_default_extract(monkeypatch):
"""EXTRACT default (True) - no skip_extract, root_data_dir unchanged."""
_clear_extract_env(monkeypatch)
_set_base_env(monkeypatch)
# Do not set EXTRACT or EXTRACT_DATA_PATH → EXTRACT defaults to True.

config = create_request()
actual = {
section: dict(config.items(section)) for section in config.sections()
}

expected = {
"metadata": {
"branch_method": "no parent",
Expand Down Expand Up @@ -66,6 +85,43 @@ def test_create_request(monkeypatch):
"mip_convert_plugin": "UKESM1",
"skip_archive": "True",
"cylc_args": "--no-detach -v",
# NOTE: no 'skip_extract' key when EXTRACT defaults to True
},
}

assert actual == expected


def test_create_request_extract_false_with_path(monkeypatch):
"""EXTRACT=False + EXTRACT_DATA_PATH set →
skip_extract + override root_data_dir."""
_clear_extract_env(monkeypatch)
_set_base_env(monkeypatch)
monkeypatch.setenv("EXTRACT", "false")
monkeypatch.setenv("EXTRACT_DATA_PATH", "/pre/extracted/data")

config = create_request()
actual = {
section: dict(config.items(section)) for section in config.sections()
}

# Ensure skip_extract is set in [conversion]
assert actual["conversion"]["skip_extract"] == "True"

# Ensure root_data_dir is taken from EXTRACT_DATA_PATH, not ROOT_DATA_DIR
assert actual["common"]["root_data_dir"] == "/pre/extracted/data"

# Optional extra checks to ensure other keys unaffected:
assert actual["common"]["root_proc_dir"] == "/path/to/proc/dir/"
assert actual["conversion"]["mip_convert_plugin"] == "UKESM1"


def test_create_request_extract_false_without_path_raises(monkeypatch):
"""EXTRACT=False with no EXTRACT_DATA_PATH → fail with ValueError."""
_clear_extract_env(monkeypatch)
_set_base_env(monkeypatch)
monkeypatch.setenv("EXTRACT", "false")
# EXTRACT_DATA_PATH intentionally not set (or could set to empty)

with pytest.raises(ValueError, match="EXTRACT=False"):
create_request()
16 changes: 15 additions & 1 deletion CMEW/flow.cylc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
install_env_file => configure_recipe & configure_for<recipe>
configure_for<recipe> => configure_standardise<recipe>
configure_recipe => run_recipe<recipe>
configure_standardise<recipe> => standardise_model_data => run_recipe<recipe> & housekeeping
configure_standardise<recipe> => standardise_model_data
{%- if HOUSEKEEPING in (True, "true", "True") %}
standardise_model_data => run_recipe<recipe> & housekeeping
{%- else %}
standardise_model_data => run_recipe<recipe>
{%- endif %}
{%- if TEST %}
run_recipe<recipe> => compare<recipe>
{%- endif %}
Expand All @@ -35,6 +40,8 @@
platform = localhost
[[[environment]]]
ESMVALTOOL_MODULE_NAME = {{ ESMVALTOOL_MODULE_NAME }}
EXTRACT = {{ EXTRACT }}
EXTRACT_DATA_PATH = {{ EXTRACT_DATA_PATH }}
USER_CONFIG_DIR = ${CYLC_WORKFLOW_SHARE_DIR}/etc
USER_CONFIG_PATH = ${USER_CONFIG_DIR}/config-user.yml
OUTPUT_DIR = ${CYLC_WORKFLOW_SHARE_DIR}/cycle/${CYLC_TASK_CYCLE_POINT}
Expand All @@ -57,6 +64,13 @@
# Workaround for bug in CDDS: ROOT_SOFTWARE_DIR: unbound variable.
ROOT_SOFTWARE_DIR = ${CDDS_SOFTWARE_DIR}
CDDS_VERSION = {{ CDDS_VERSION }}
{%- if EXTRACT in (False, "false", "False") %}
# When EXTRACT is False, reuse pre-extracted data from EXTRACT_DATA_PATH.
# Align ROOT_DATA_DIR (used by restructure_for_cmip6) with the path
# that create_request_file.py writes into [common].root_data_dir.
ROOT_DATA_DIR = "{{ EXTRACT_DATA_PATH }}"
{%- endif %}


# Multi-run metadata family: reference and evaluation runs
[[MODEL_RUNS]]
Expand Down
24 changes: 24 additions & 0 deletions CMEW/meta/rose-meta.conf
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,30 @@ help=If required, this value must be set in a site-specific configuration file
=empty string.
type=quoted

[template variables=EXTRACT]
compulsory=true
description=To switch OFF or ON CDDS extract
help=The default is true, so data are extracted from MASS prior to a run.
sort-key=17
type=boolean

[template variables=EXTRACT_DATA_PATH]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in yesterday's meeting, "RAW" data seems clearer than "EXTRACT[ED]". Less ambiguity about how far through the standardisation process it had got to (i.e. nowhere).

compulsory=true
description=The path to extracted data already available.
help=The default is "", so data to be extracted from MASS. If "", EXTRACT
=must be set to true, otherwise an Error will be raised.
sort-key=18
type=quoted

[template variables=HOUSEKEEPING]
compulsory=true
description=To switch OFF or ON the housekeeping task.
help=The default is true, so the locally staged CDDS workspace is removed.
=Make sure you set it to false if you want to keep the data and run
=with EXTRACT=true
sort-key=16
type=boolean

[template variables=LABEL_FOR_PLOTS]
compulsory=false
description=An 'alias' for the evaluation dataset in the ESMValTool recipe.
Expand Down
3 changes: 3 additions & 0 deletions CMEW/rose-suite.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ DRS_OBS="default"
DRS_OBS4MIPS="default"
DRS_OBS6="default"
ESMVALTOOL_MODULE_NAME=""
EXTRACT=true
EXTRACT_DATA_PATH=""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's me, but I think this is actually more like "EXTANT_DATA_PATH", or something about "previously extracted".
"[To] extract" sounds like a command.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think this is the case, see comments on rose-meta.conf.

HOUSEKEEPING=true
LABEL_FOR_PLOTS="UKESM1.0 N96ORCA1"
MAX_PARALLEL_TASKS=4
MODEL_ID="UKESM1-0-LL"
Expand Down