diff --git a/chap_core/cli.py b/chap_core/cli.py index e605facf1..25cac75b0 100644 --- a/chap_core/cli.py +++ b/chap_core/cli.py @@ -10,7 +10,7 @@ from cyclopts import App # noqa: E402 -from chap_core.cli_endpoints import evaluate, forecast, init, preference_learn, utils, validate # noqa: E402 +from chap_core.cli_endpoints import convert, evaluate, forecast, init, preference_learn, utils, validate # noqa: E402 logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -18,6 +18,7 @@ app = App() # Register commands from each module +convert.register_commands(app) evaluate.register_commands(app) forecast.register_commands(app) init.register_commands(app) diff --git a/chap_core/cli_endpoints/_common.py b/chap_core/cli_endpoints/_common.py index 605f13540..f0d5f9542 100644 --- a/chap_core/cli_endpoints/_common.py +++ b/chap_core/cli_endpoints/_common.py @@ -8,7 +8,6 @@ import yaml from chap_core.database.model_templates_and_config_tables import ModelConfiguration -from chap_core.datatypes import FullData from chap_core.file_io.example_data_set import datasets from chap_core.geometry import Polygons from chap_core.models.model_template import ModelTemplate @@ -132,10 +131,10 @@ def load_dataset_from_csv( df = pd.read_csv(csv_path) # Rename columns: mapping is {target_name: source_name}, so swap for rename df.rename(columns={v: k for k, v in column_mapping.items()}, inplace=True) - dataset = DataSet.from_pandas(df, FullData) + dataset = DataSet.from_pandas(df) dataset.metadata = DataSetMetaData(name=str(Path(csv_path).stem), filename=str(csv_path)) else: - dataset = DataSet.from_csv(csv_path, FullData) + dataset = DataSet.from_csv(csv_path) if geojson_path is not None: logging.info(f"Loading polygons from {geojson_path}") @@ -156,7 +155,7 @@ def load_dataset( if dataset_name is None: assert dataset_csv is not None, "Must specify a dataset name or a dataset csv file" logging.info(f"Loading dataset from {dataset_csv}") - dataset = DataSet.from_csv(dataset_csv, FullData) + dataset = DataSet.from_csv(dataset_csv) if polygons_json is not None: logging.info(f"Loading polygons from {polygons_json}") polygons = Polygons.from_file(polygons_json, id_property=polygons_id_field) diff --git a/chap_core/cli_endpoints/convert.py b/chap_core/cli_endpoints/convert.py new file mode 100644 index 000000000..6a186d509 --- /dev/null +++ b/chap_core/cli_endpoints/convert.py @@ -0,0 +1,62 @@ +"""Convert commands for CHAP CLI.""" + +import json +import logging +from pathlib import Path +from typing import Annotated + +import pandas as pd +from cyclopts import Parameter + +logger = logging.getLogger(__name__) + + +def convert_request( + request_json: Annotated[ + Path, + Parameter(help="Path to a create-backtest-with-data JSON request file"), + ], + output_prefix: Annotated[ + Path, + Parameter(help="Prefix for output files (creates PREFIX.csv and PREFIX.geojson)"), + ], +): + """Convert a create-backtest-with-data JSON request to CSV and GeoJSON files. + + Takes a JSON payload from the DHIS2/Modeling App and produces: + 1. A CHAP-compatible CSV file with time_period, location, and feature columns + 2. A GeoJSON file with region boundaries + + Examples: + chap convert-request ./request.json ./output + """ + with open(request_json) as f: + data = json.load(f) + + provided_data = data["providedData"] + df = pd.DataFrame(provided_data) + + df = df.rename(columns={"orgUnit": "location", "period": "time_period"}) + + pivoted = df.pivot_table( + index=["location", "time_period"], + columns="featureName", + values="value", + aggfunc="first", + ).reset_index() + + pivoted.columns.name = None + + csv_path = Path(f"{output_prefix}.csv") + pivoted.to_csv(csv_path, index=False) + print(f"Created: {csv_path}") + + geojson_path = Path(f"{output_prefix}.geojson") + with open(geojson_path, "w") as f: + json.dump(data["geojson"], f, indent=2) + print(f"Created: {geojson_path}") + + +def register_commands(app): + """Register convert commands with the CLI app.""" + app.command(name="convert-request")(convert_request) diff --git a/docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md b/docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md index cb1e95a96..0a97f6392 100644 --- a/docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md +++ b/docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md @@ -38,6 +38,29 @@ If you have CHAP connected to a DHIS2 instance via the Modeling App, you can cre The downloaded CSV will already be in CHAP-compatible format. +## Converting a Modeling App request to CSV and GeoJSON + +If you have a JSON request payload from the DHIS2 Modeling App (the `create-backtest-with-data` format), you can convert it directly to a CHAP-compatible CSV and GeoJSON file pair using `chap convert-request`: + +```bash +chap convert-request example_data/create-backtest-with-data.json /tmp/chap_convert_doctest +``` + +This reads the JSON file and produces two files: + +- `/tmp/chap_convert_doctest.csv` -- a pivoted CSV with `time_period`, `location`, and feature columns +- `/tmp/chap_convert_doctest.geojson` -- the region boundaries extracted from the request + +You can then validate the result: + +```bash +chap validate --dataset-csv /tmp/chap_convert_doctest.csv +``` + +```bash +rm -f /tmp/chap_convert_doctest.csv /tmp/chap_convert_doctest.geojson +``` + ## Transforming data from other sources If your data comes from a source other than DHIS2, you need to make sure it matches the CHAP format. @@ -99,8 +122,8 @@ Use the `chap validate` command to check that your CSV is CHAP-compatible before ### Basic validation -```console -chap validate --dataset-csv my_data.csv +```bash +chap validate --dataset-csv example_data/laos_subset.csv ``` This checks for: @@ -113,10 +136,10 @@ This checks for: You can also validate that your dataset has the covariates a specific model requires: -```console +```bash chap validate \ - --dataset-csv my_data.csv \ - --model-name https://github.com/dhis2-chap/minimalist_example_r + --dataset-csv example_data/laos_subset.csv \ + --model-name external_models/naive_python_model_uv ``` This additionally checks that all required covariates for the model are present in the dataset, and that the time period type (weekly/monthly) matches what the model supports. @@ -125,25 +148,18 @@ This additionally checks that all required covariates for the model are present If your CSV uses different column names than what the model expects, provide a mapping file: -```console +```bash chap validate \ - --dataset-csv my_data.csv \ - --model-name https://github.com/dhis2-chap/minimalist_example_r \ - --data-source-mapping mapping.json + --dataset-csv example_data/laos_subset_custom_columns.csv \ + --data-source-mapping example_data/column_mapping.json ``` -Where `mapping.json` maps model covariate names to your CSV column names: +Where `column_mapping.json` maps model covariate names to your CSV column names: ```json {"rainfall": "rain_mm", "mean_temperature": "temp_avg"} ``` -### Example: validating the bundled dataset - -```bash -chap validate --dataset-csv example_data/laos_subset.csv -``` - ## Running an evaluation Once your dataset is validated, you can evaluate a model on it using `chap eval`: diff --git a/example_data/column_mapping.json b/example_data/column_mapping.json new file mode 100644 index 000000000..70d9b4599 --- /dev/null +++ b/example_data/column_mapping.json @@ -0,0 +1 @@ +{"rainfall": "rain_mm", "mean_temperature": "temp_avg"} diff --git a/example_data/laos_subset_custom_columns.csv b/example_data/laos_subset_custom_columns.csv new file mode 100644 index 000000000..855720040 --- /dev/null +++ b/example_data/laos_subset_custom_columns.csv @@ -0,0 +1,109 @@ +time_period,rain_mm,temp_avg,disease_cases,population,location +2010-01,37.965,20.04,1.0,75049.5636160714,Bokeo +2010-02,8.527,22.22,1.0,75049.5636160714,Bokeo +2010-03,23.591,24.59,2.0,75049.5636160714,Bokeo +2010-04,101.176,27.52,0.0,75049.5636160714,Bokeo +2010-05,106.366,27.23,1.0,75049.5636160714,Bokeo +2010-06,235.044,26.01,5.0,75049.5636160714,Bokeo +2010-07,512.741,24.6,9.0,75049.5636160714,Bokeo +2010-08,483.291,23.8,8.0,75049.5636160714,Bokeo +2010-09,282.196,23.74,27.0,75049.5636160714,Bokeo +2010-10,145.437,22.68,22.0,75049.5636160714,Bokeo +2010-11,6.601,20.44,18.0,75049.5636160714,Bokeo +2010-12,45.062,19.31,4.0,75049.5636160714,Bokeo +2011-01,29.189,18.16,1.0,76704.2431640625,Bokeo +2011-02,7.948,21.24,1.0,76704.2431640625,Bokeo +2011-03,95.081,21.08,1.0,76704.2431640625,Bokeo +2011-04,165.476,23.52,1.0,76704.2431640625,Bokeo +2011-05,309.719,23.51,3.0,76704.2431640625,Bokeo +2011-06,280.401,23.93,4.0,76704.2431640625,Bokeo +2011-07,395.965,24.14,6.0,76704.2431640625,Bokeo +2011-08,394.6,23.5,10.0,76704.2431640625,Bokeo +2011-09,346.165,23.28,8.0,76704.2431640625,Bokeo +2011-10,65.909,22.71,10.0,76704.2431640625,Bokeo +2011-11,11.921,20.2,6.0,76704.2431640625,Bokeo +2011-12,5.966,18.21,3.0,76704.2431640625,Bokeo +2012-01,48.604,18.88,2.0,78358.9227120536,Bokeo +2012-02,10.379,22.47,2.0,78358.9227120536,Bokeo +2012-03,45.608,24.01,1.0,78358.9227120536,Bokeo +2012-04,75.97,25.76,2.0,78358.9227120536,Bokeo +2012-05,372.717,24.78,5.0,78358.9227120536,Bokeo +2012-06,234.729,24.37,10.0,78358.9227120536,Bokeo +2012-07,434.478,23.16,17.0,78358.9227120536,Bokeo +2012-08,362.176,23.7,17.0,78358.9227120536,Bokeo +2012-09,242.424,23.69,26.0,78358.9227120536,Bokeo +2012-10,107.072,22.96,28.0,78358.9227120536,Bokeo +2012-11,119.215,22.14,18.0,78358.9227120536,Bokeo +2012-12,27.869,19.29,8.0,78358.9227120536,Bokeo +2010-01,99.579,22.4,110.0,558527.426339286,Vientiane[prefecture] +2010-02,4.529,25.48,93.0,558527.426339286,Vientiane[prefecture] +2010-03,4.093,27.4,68.0,558527.426339286,Vientiane[prefecture] +2010-04,66.746,29.71,87.0,558527.426339286,Vientiane[prefecture] +2010-05,252.105,28.47,116.0,558527.426339286,Vientiane[prefecture] +2010-06,316.23,27.41,193.0,558527.426339286,Vientiane[prefecture] +2010-07,455.367,26.57,852.0,558527.426339286,Vientiane[prefecture] +2010-08,580.987,25.58,1291.0,558527.426339286,Vientiane[prefecture] +2010-09,353.751,25.89,1117.0,558527.426339286,Vientiane[prefecture] +2010-10,157.159,24.69,606.0,558527.426339286,Vientiane[prefecture] +2010-11,1.463,23.48,212.0,558527.426339286,Vientiane[prefecture] +2010-12,12.176,22.25,36.0,558527.426339286,Vientiane[prefecture] +2011-01,2.258,19.5,23.0,568977.998046875,Vientiane[prefecture] +2011-02,5.528,23.89,18.0,568977.998046875,Vientiane[prefecture] +2011-03,105.513,22.54,26.0,568977.998046875,Vientiane[prefecture] +2011-04,69.588,26.93,31.0,568977.998046875,Vientiane[prefecture] +2011-05,248.2,26.33,67.0,568977.998046875,Vientiane[prefecture] +2011-06,412.335,26.27,89.0,568977.998046875,Vientiane[prefecture] +2011-07,540.268,26.08,115.0,568977.998046875,Vientiane[prefecture] +2011-08,354.639,25.67,197.0,568977.998046875,Vientiane[prefecture] +2011-09,486.031,25.25,163.0,568977.998046875,Vientiane[prefecture] +2011-10,152.014,24.64,205.0,568977.998046875,Vientiane[prefecture] +2011-11,13.382,24.24,120.0,568977.998046875,Vientiane[prefecture] +2011-12,1.713,20.33,55.0,568977.998046875,Vientiane[prefecture] +2012-01,18.871,21.82,51.0,579428.569754464,Vientiane[prefecture] +2012-02,11.774,24.68,36.0,579428.569754464,Vientiane[prefecture] +2012-03,28.807,26.42,29.0,579428.569754464,Vientiane[prefecture] +2012-04,129.881,27.8,46.0,579428.569754464,Vientiane[prefecture] +2012-05,377.271,26.67,94.0,579428.569754464,Vientiane[prefecture] +2012-06,263.954,26.6,209.0,579428.569754464,Vientiane[prefecture] +2012-07,394.475,25.74,351.0,579428.569754464,Vientiane[prefecture] +2012-08,426.776,25.73,359.0,579428.569754464,Vientiane[prefecture] +2012-09,232.189,26.08,542.0,579428.569754464,Vientiane[prefecture] +2012-10,58.911,26.4,573.0,579428.569754464,Vientiane[prefecture] +2012-11,36.796,26.24,369.0,579428.569754464,Vientiane[prefecture] +2012-12,6.881,23.19,169.0,579428.569754464,Vientiane[prefecture] +2010-01,18.568,22.22,1.0,685766.336495536,Savannakhet +2010-02,24.886,25.09,2.0,685766.336495536,Savannakhet +2010-03,7.228,26.05,19.0,685766.336495536,Savannakhet +2010-04,54.372,28.2,51.0,685766.336495536,Savannakhet +2010-05,186.318,27.86,37.0,685766.336495536,Savannakhet +2010-06,213.602,26.97,293.0,685766.336495536,Savannakhet +2010-07,237.73,26.5,803.0,685766.336495536,Savannakhet +2010-08,452.702,24.98,566.0,685766.336495536,Savannakhet +2010-09,219.88,25.38,569.0,685766.336495536,Savannakhet +2010-10,294.59,23.57,158.0,685766.336495536,Savannakhet +2010-11,62.321,22.16,24.0,685766.336495536,Savannakhet +2010-12,9.036,21.39,0.0,685766.336495536,Savannakhet +2011-01,8.166,18.19,14.0,694482.169433594,Savannakhet +2011-02,7.73,22.24,11.0,694482.169433594,Savannakhet +2011-03,30.255,21.13,16.0,694482.169433594,Savannakhet +2011-04,80.605,25.43,20.0,694482.169433594,Savannakhet +2011-05,184.776,26.09,43.0,694482.169433594,Savannakhet +2011-06,264.151,25.75,57.0,694482.169433594,Savannakhet +2011-07,279.663,25.33,74.0,694482.169433594,Savannakhet +2011-08,345.62,24.95,127.0,694482.169433594,Savannakhet +2011-09,407.913,23.96,105.0,694482.169433594,Savannakhet +2011-10,248.981,23.29,132.0,694482.169433594,Savannakhet +2011-11,60.231,23.13,77.0,694482.169433594,Savannakhet +2011-12,12.468,18.97,36.0,694482.169433594,Savannakhet +2012-01,33.55,20.43,33.0,703198.002371652,Savannakhet +2012-02,6.125,22.61,23.0,703198.002371652,Savannakhet +2012-03,50.848,24.59,19.0,703198.002371652,Savannakhet +2012-04,105.211,26.51,30.0,703198.002371652,Savannakhet +2012-05,233.339,26.52,60.0,703198.002371652,Savannakhet +2012-06,268.42,25.78,134.0,703198.002371652,Savannakhet +2012-07,286.584,25.25,225.0,703198.002371652,Savannakhet +2012-08,248.395,25.08,230.0,703198.002371652,Savannakhet +2012-09,205.193,25.07,348.0,703198.002371652,Savannakhet +2012-10,46.7,25.13,367.0,703198.002371652,Savannakhet +2012-11,22.827,25.5,237.0,703198.002371652,Savannakhet +2012-12,7.775,23.08,108.0,703198.002371652,Savannakhet diff --git a/tests/test_convert_request.py b/tests/test_convert_request.py new file mode 100644 index 000000000..2c463b86a --- /dev/null +++ b/tests/test_convert_request.py @@ -0,0 +1,82 @@ +import json + +import pandas as pd + +from chap_core.cli_endpoints.convert import convert_request + + +def test_convert_request_from_example_json(tmp_path): + """Test convert_request using the example JSON fixture.""" + input_path = "example_data/create-backtest-with-data.json" + output_prefix = tmp_path / "output" + + convert_request(input_path, output_prefix) + + csv_path = tmp_path / "output.csv" + geojson_path = tmp_path / "output.geojson" + + assert csv_path.exists() + assert geojson_path.exists() + + df = pd.read_csv(csv_path) + assert "location" in df.columns + assert "time_period" in df.columns + assert "disease_cases" in df.columns + assert "rainfall" in df.columns + assert "mean_temperature" in df.columns + assert len(df) > 0 + + with open(geojson_path) as f: + geojson = json.load(f) + assert geojson["type"] == "FeatureCollection" + assert len(geojson["features"]) > 0 + + +def test_convert_request_pivots_correctly(tmp_path): + """Test that provided data is correctly pivoted into columns.""" + request = { + "providedData": [ + {"featureName": "rainfall", "orgUnit": "loc_1", "period": "2022-01", "value": 1.5}, + {"featureName": "rainfall", "orgUnit": "loc_1", "period": "2022-02", "value": 2.0}, + {"featureName": "disease_cases", "orgUnit": "loc_1", "period": "2022-01", "value": 10}, + {"featureName": "disease_cases", "orgUnit": "loc_1", "period": "2022-02", "value": 15}, + {"featureName": "rainfall", "orgUnit": "loc_2", "period": "2022-01", "value": 3.0}, + {"featureName": "disease_cases", "orgUnit": "loc_2", "period": "2022-01", "value": 20}, + ], + "geojson": { + "type": "FeatureCollection", + "features": [ + { + "id": "loc_1", + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [0, 0]}, + "properties": {}, + }, + { + "id": "loc_2", + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [1, 1]}, + "properties": {}, + }, + ], + }, + } + + input_path = tmp_path / "request.json" + with open(input_path, "w") as f: + json.dump(request, f) + + output_prefix = tmp_path / "result" + convert_request(input_path, output_prefix) + + df = pd.read_csv(tmp_path / "result.csv") + assert set(df.columns) == {"location", "time_period", "disease_cases", "rainfall"} + assert len(df) == 3 # 2 periods for loc_1 + 1 period for loc_2 + + row = df[(df["location"] == "loc_1") & (df["time_period"] == "2022-01")].iloc[0] + assert row["rainfall"] == 1.5 + assert row["disease_cases"] == 10 + + with open(tmp_path / "result.geojson") as f: + geojson = json.load(f) + assert len(geojson["features"]) == 2 diff --git a/tests/test_documentation.py b/tests/test_documentation.py index 4102e0392..fbc0a2e0c 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -32,6 +32,7 @@ "docs/feature_tutorials/extended_predictor.md", "docs/chap-cli/evaluation-workflow.md", # Workshop tutorials (instructional content, not testable code) + # Note: 11_feb_presession.md is tested in test_documentation_slow.py "docs/kigali-workshop", ] diff --git a/tests/test_documentation_slow.py b/tests/test_documentation_slow.py index b084c5ea0..d17124bdd 100644 --- a/tests/test_documentation_slow.py +++ b/tests/test_documentation_slow.py @@ -126,9 +126,18 @@ class TestSlowDocumentationBash: SLOW_DOC_FILES = [ "docs/feature_tutorials/extended_predictor.md", "docs/chap-cli/evaluation-workflow.md", + "docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md", ] @pytest.mark.parametrize("fpath", SLOW_DOC_FILES) def test_slow_docs_bash(self, fpath): """Test bash code blocks in slow documentation files.""" check_md_file(fpath=fpath, lang="bash") + + @pytest.mark.parametrize( + "fpath", + ["docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md"], + ) + def test_slow_docs_python(self, fpath): + """Test Python code blocks in slow documentation files.""" + check_md_file(fpath=fpath, lang="python", memory=True)