Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion chap_core/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@

from cyclopts import App # noqa: E402

from chap_core.cli_endpoints import evaluate, forecast, init, preference_learn, utils, validate # noqa: E402
from chap_core.cli_endpoints import convert, evaluate, forecast, init, preference_learn, utils, validate # noqa: E402

logger = logging.getLogger()
logger.setLevel(logging.INFO)

app = App()

# Register commands from each module
convert.register_commands(app)
evaluate.register_commands(app)
forecast.register_commands(app)
init.register_commands(app)
Expand Down
7 changes: 3 additions & 4 deletions chap_core/cli_endpoints/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import yaml

from chap_core.database.model_templates_and_config_tables import ModelConfiguration
from chap_core.datatypes import FullData
from chap_core.file_io.example_data_set import datasets
from chap_core.geometry import Polygons
from chap_core.models.model_template import ModelTemplate
Expand Down Expand Up @@ -132,10 +131,10 @@ def load_dataset_from_csv(
df = pd.read_csv(csv_path)
# Rename columns: mapping is {target_name: source_name}, so swap for rename
df.rename(columns={v: k for k, v in column_mapping.items()}, inplace=True)
dataset = DataSet.from_pandas(df, FullData)
dataset = DataSet.from_pandas(df)
dataset.metadata = DataSetMetaData(name=str(Path(csv_path).stem), filename=str(csv_path))
else:
dataset = DataSet.from_csv(csv_path, FullData)
dataset = DataSet.from_csv(csv_path)

if geojson_path is not None:
logging.info(f"Loading polygons from {geojson_path}")
Expand All @@ -156,7 +155,7 @@ def load_dataset(
if dataset_name is None:
assert dataset_csv is not None, "Must specify a dataset name or a dataset csv file"
logging.info(f"Loading dataset from {dataset_csv}")
dataset = DataSet.from_csv(dataset_csv, FullData)
dataset = DataSet.from_csv(dataset_csv)
if polygons_json is not None:
logging.info(f"Loading polygons from {polygons_json}")
polygons = Polygons.from_file(polygons_json, id_property=polygons_id_field)
Expand Down
62 changes: 62 additions & 0 deletions chap_core/cli_endpoints/convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Convert commands for CHAP CLI."""

import json
import logging
from pathlib import Path
from typing import Annotated

import pandas as pd
from cyclopts import Parameter

logger = logging.getLogger(__name__)


def convert_request(
request_json: Annotated[
Path,
Parameter(help="Path to a create-backtest-with-data JSON request file"),
],
output_prefix: Annotated[
Path,
Parameter(help="Prefix for output files (creates PREFIX.csv and PREFIX.geojson)"),
],
):
"""Convert a create-backtest-with-data JSON request to CSV and GeoJSON files.

Takes a JSON payload from the DHIS2/Modeling App and produces:
1. A CHAP-compatible CSV file with time_period, location, and feature columns
2. A GeoJSON file with region boundaries

Examples:
chap convert-request ./request.json ./output
"""
with open(request_json) as f:
data = json.load(f)

provided_data = data["providedData"]
df = pd.DataFrame(provided_data)

df = df.rename(columns={"orgUnit": "location", "period": "time_period"})

pivoted = df.pivot_table(
index=["location", "time_period"],
columns="featureName",
values="value",
aggfunc="first",
).reset_index()

pivoted.columns.name = None

csv_path = Path(f"{output_prefix}.csv")
pivoted.to_csv(csv_path, index=False)
print(f"Created: {csv_path}")

geojson_path = Path(f"{output_prefix}.geojson")
with open(geojson_path, "w") as f:
json.dump(data["geojson"], f, indent=2)
print(f"Created: {geojson_path}")


def register_commands(app):
"""Register convert commands with the CLI app."""
app.command(name="convert-request")(convert_request)
48 changes: 32 additions & 16 deletions docs/kigali-workshop/kigali-workshop-material/11_feb_presession.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ If you have CHAP connected to a DHIS2 instance via the Modeling App, you can cre

The downloaded CSV will already be in CHAP-compatible format.

## Converting a Modeling App request to CSV and GeoJSON

If you have a JSON request payload from the DHIS2 Modeling App (the `create-backtest-with-data` format), you can convert it directly to a CHAP-compatible CSV and GeoJSON file pair using `chap convert-request`:

```bash
chap convert-request example_data/create-backtest-with-data.json /tmp/chap_convert_doctest
```

This reads the JSON file and produces two files:

- `/tmp/chap_convert_doctest.csv` -- a pivoted CSV with `time_period`, `location`, and feature columns
- `/tmp/chap_convert_doctest.geojson` -- the region boundaries extracted from the request

You can then validate the result:

```bash
chap validate --dataset-csv /tmp/chap_convert_doctest.csv
```

```bash
rm -f /tmp/chap_convert_doctest.csv /tmp/chap_convert_doctest.geojson
```

## Transforming data from other sources

If your data comes from a source other than DHIS2, you need to make sure it matches the CHAP format.
Expand Down Expand Up @@ -99,8 +122,8 @@ Use the `chap validate` command to check that your CSV is CHAP-compatible before

### Basic validation

```console
chap validate --dataset-csv my_data.csv
```bash
chap validate --dataset-csv example_data/laos_subset.csv
```

This checks for:
Expand All @@ -113,10 +136,10 @@ This checks for:

You can also validate that your dataset has the covariates a specific model requires:

```console
```bash
chap validate \
--dataset-csv my_data.csv \
--model-name https://github.com/dhis2-chap/minimalist_example_r
--dataset-csv example_data/laos_subset.csv \
--model-name external_models/naive_python_model_uv
```

This additionally checks that all required covariates for the model are present in the dataset, and that the time period type (weekly/monthly) matches what the model supports.
Expand All @@ -125,25 +148,18 @@ This additionally checks that all required covariates for the model are present

If your CSV uses different column names than what the model expects, provide a mapping file:

```console
```bash
chap validate \
--dataset-csv my_data.csv \
--model-name https://github.com/dhis2-chap/minimalist_example_r \
--data-source-mapping mapping.json
--dataset-csv example_data/laos_subset_custom_columns.csv \
--data-source-mapping example_data/column_mapping.json
```

Where `mapping.json` maps model covariate names to your CSV column names:
Where `column_mapping.json` maps model covariate names to your CSV column names:

```json
{"rainfall": "rain_mm", "mean_temperature": "temp_avg"}
```

### Example: validating the bundled dataset

```bash
chap validate --dataset-csv example_data/laos_subset.csv
```

## Running an evaluation

Once your dataset is validated, you can evaluate a model on it using `chap eval`:
Expand Down
1 change: 1 addition & 0 deletions example_data/column_mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"rainfall": "rain_mm", "mean_temperature": "temp_avg"}
109 changes: 109 additions & 0 deletions example_data/laos_subset_custom_columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
time_period,rain_mm,temp_avg,disease_cases,population,location
2010-01,37.965,20.04,1.0,75049.5636160714,Bokeo
2010-02,8.527,22.22,1.0,75049.5636160714,Bokeo
2010-03,23.591,24.59,2.0,75049.5636160714,Bokeo
2010-04,101.176,27.52,0.0,75049.5636160714,Bokeo
2010-05,106.366,27.23,1.0,75049.5636160714,Bokeo
2010-06,235.044,26.01,5.0,75049.5636160714,Bokeo
2010-07,512.741,24.6,9.0,75049.5636160714,Bokeo
2010-08,483.291,23.8,8.0,75049.5636160714,Bokeo
2010-09,282.196,23.74,27.0,75049.5636160714,Bokeo
2010-10,145.437,22.68,22.0,75049.5636160714,Bokeo
2010-11,6.601,20.44,18.0,75049.5636160714,Bokeo
2010-12,45.062,19.31,4.0,75049.5636160714,Bokeo
2011-01,29.189,18.16,1.0,76704.2431640625,Bokeo
2011-02,7.948,21.24,1.0,76704.2431640625,Bokeo
2011-03,95.081,21.08,1.0,76704.2431640625,Bokeo
2011-04,165.476,23.52,1.0,76704.2431640625,Bokeo
2011-05,309.719,23.51,3.0,76704.2431640625,Bokeo
2011-06,280.401,23.93,4.0,76704.2431640625,Bokeo
2011-07,395.965,24.14,6.0,76704.2431640625,Bokeo
2011-08,394.6,23.5,10.0,76704.2431640625,Bokeo
2011-09,346.165,23.28,8.0,76704.2431640625,Bokeo
2011-10,65.909,22.71,10.0,76704.2431640625,Bokeo
2011-11,11.921,20.2,6.0,76704.2431640625,Bokeo
2011-12,5.966,18.21,3.0,76704.2431640625,Bokeo
2012-01,48.604,18.88,2.0,78358.9227120536,Bokeo
2012-02,10.379,22.47,2.0,78358.9227120536,Bokeo
2012-03,45.608,24.01,1.0,78358.9227120536,Bokeo
2012-04,75.97,25.76,2.0,78358.9227120536,Bokeo
2012-05,372.717,24.78,5.0,78358.9227120536,Bokeo
2012-06,234.729,24.37,10.0,78358.9227120536,Bokeo
2012-07,434.478,23.16,17.0,78358.9227120536,Bokeo
2012-08,362.176,23.7,17.0,78358.9227120536,Bokeo
2012-09,242.424,23.69,26.0,78358.9227120536,Bokeo
2012-10,107.072,22.96,28.0,78358.9227120536,Bokeo
2012-11,119.215,22.14,18.0,78358.9227120536,Bokeo
2012-12,27.869,19.29,8.0,78358.9227120536,Bokeo
2010-01,99.579,22.4,110.0,558527.426339286,Vientiane[prefecture]
2010-02,4.529,25.48,93.0,558527.426339286,Vientiane[prefecture]
2010-03,4.093,27.4,68.0,558527.426339286,Vientiane[prefecture]
2010-04,66.746,29.71,87.0,558527.426339286,Vientiane[prefecture]
2010-05,252.105,28.47,116.0,558527.426339286,Vientiane[prefecture]
2010-06,316.23,27.41,193.0,558527.426339286,Vientiane[prefecture]
2010-07,455.367,26.57,852.0,558527.426339286,Vientiane[prefecture]
2010-08,580.987,25.58,1291.0,558527.426339286,Vientiane[prefecture]
2010-09,353.751,25.89,1117.0,558527.426339286,Vientiane[prefecture]
2010-10,157.159,24.69,606.0,558527.426339286,Vientiane[prefecture]
2010-11,1.463,23.48,212.0,558527.426339286,Vientiane[prefecture]
2010-12,12.176,22.25,36.0,558527.426339286,Vientiane[prefecture]
2011-01,2.258,19.5,23.0,568977.998046875,Vientiane[prefecture]
2011-02,5.528,23.89,18.0,568977.998046875,Vientiane[prefecture]
2011-03,105.513,22.54,26.0,568977.998046875,Vientiane[prefecture]
2011-04,69.588,26.93,31.0,568977.998046875,Vientiane[prefecture]
2011-05,248.2,26.33,67.0,568977.998046875,Vientiane[prefecture]
2011-06,412.335,26.27,89.0,568977.998046875,Vientiane[prefecture]
2011-07,540.268,26.08,115.0,568977.998046875,Vientiane[prefecture]
2011-08,354.639,25.67,197.0,568977.998046875,Vientiane[prefecture]
2011-09,486.031,25.25,163.0,568977.998046875,Vientiane[prefecture]
2011-10,152.014,24.64,205.0,568977.998046875,Vientiane[prefecture]
2011-11,13.382,24.24,120.0,568977.998046875,Vientiane[prefecture]
2011-12,1.713,20.33,55.0,568977.998046875,Vientiane[prefecture]
2012-01,18.871,21.82,51.0,579428.569754464,Vientiane[prefecture]
2012-02,11.774,24.68,36.0,579428.569754464,Vientiane[prefecture]
2012-03,28.807,26.42,29.0,579428.569754464,Vientiane[prefecture]
2012-04,129.881,27.8,46.0,579428.569754464,Vientiane[prefecture]
2012-05,377.271,26.67,94.0,579428.569754464,Vientiane[prefecture]
2012-06,263.954,26.6,209.0,579428.569754464,Vientiane[prefecture]
2012-07,394.475,25.74,351.0,579428.569754464,Vientiane[prefecture]
2012-08,426.776,25.73,359.0,579428.569754464,Vientiane[prefecture]
2012-09,232.189,26.08,542.0,579428.569754464,Vientiane[prefecture]
2012-10,58.911,26.4,573.0,579428.569754464,Vientiane[prefecture]
2012-11,36.796,26.24,369.0,579428.569754464,Vientiane[prefecture]
2012-12,6.881,23.19,169.0,579428.569754464,Vientiane[prefecture]
2010-01,18.568,22.22,1.0,685766.336495536,Savannakhet
2010-02,24.886,25.09,2.0,685766.336495536,Savannakhet
2010-03,7.228,26.05,19.0,685766.336495536,Savannakhet
2010-04,54.372,28.2,51.0,685766.336495536,Savannakhet
2010-05,186.318,27.86,37.0,685766.336495536,Savannakhet
2010-06,213.602,26.97,293.0,685766.336495536,Savannakhet
2010-07,237.73,26.5,803.0,685766.336495536,Savannakhet
2010-08,452.702,24.98,566.0,685766.336495536,Savannakhet
2010-09,219.88,25.38,569.0,685766.336495536,Savannakhet
2010-10,294.59,23.57,158.0,685766.336495536,Savannakhet
2010-11,62.321,22.16,24.0,685766.336495536,Savannakhet
2010-12,9.036,21.39,0.0,685766.336495536,Savannakhet
2011-01,8.166,18.19,14.0,694482.169433594,Savannakhet
2011-02,7.73,22.24,11.0,694482.169433594,Savannakhet
2011-03,30.255,21.13,16.0,694482.169433594,Savannakhet
2011-04,80.605,25.43,20.0,694482.169433594,Savannakhet
2011-05,184.776,26.09,43.0,694482.169433594,Savannakhet
2011-06,264.151,25.75,57.0,694482.169433594,Savannakhet
2011-07,279.663,25.33,74.0,694482.169433594,Savannakhet
2011-08,345.62,24.95,127.0,694482.169433594,Savannakhet
2011-09,407.913,23.96,105.0,694482.169433594,Savannakhet
2011-10,248.981,23.29,132.0,694482.169433594,Savannakhet
2011-11,60.231,23.13,77.0,694482.169433594,Savannakhet
2011-12,12.468,18.97,36.0,694482.169433594,Savannakhet
2012-01,33.55,20.43,33.0,703198.002371652,Savannakhet
2012-02,6.125,22.61,23.0,703198.002371652,Savannakhet
2012-03,50.848,24.59,19.0,703198.002371652,Savannakhet
2012-04,105.211,26.51,30.0,703198.002371652,Savannakhet
2012-05,233.339,26.52,60.0,703198.002371652,Savannakhet
2012-06,268.42,25.78,134.0,703198.002371652,Savannakhet
2012-07,286.584,25.25,225.0,703198.002371652,Savannakhet
2012-08,248.395,25.08,230.0,703198.002371652,Savannakhet
2012-09,205.193,25.07,348.0,703198.002371652,Savannakhet
2012-10,46.7,25.13,367.0,703198.002371652,Savannakhet
2012-11,22.827,25.5,237.0,703198.002371652,Savannakhet
2012-12,7.775,23.08,108.0,703198.002371652,Savannakhet
82 changes: 82 additions & 0 deletions tests/test_convert_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import json

import pandas as pd

from chap_core.cli_endpoints.convert import convert_request


def test_convert_request_from_example_json(tmp_path):
"""Test convert_request using the example JSON fixture."""
input_path = "example_data/create-backtest-with-data.json"
output_prefix = tmp_path / "output"

convert_request(input_path, output_prefix)

csv_path = tmp_path / "output.csv"
geojson_path = tmp_path / "output.geojson"

assert csv_path.exists()
assert geojson_path.exists()

df = pd.read_csv(csv_path)
assert "location" in df.columns
assert "time_period" in df.columns
assert "disease_cases" in df.columns
assert "rainfall" in df.columns
assert "mean_temperature" in df.columns
assert len(df) > 0

with open(geojson_path) as f:
geojson = json.load(f)
assert geojson["type"] == "FeatureCollection"
assert len(geojson["features"]) > 0


def test_convert_request_pivots_correctly(tmp_path):
"""Test that provided data is correctly pivoted into columns."""
request = {
"providedData": [
{"featureName": "rainfall", "orgUnit": "loc_1", "period": "2022-01", "value": 1.5},
{"featureName": "rainfall", "orgUnit": "loc_1", "period": "2022-02", "value": 2.0},
{"featureName": "disease_cases", "orgUnit": "loc_1", "period": "2022-01", "value": 10},
{"featureName": "disease_cases", "orgUnit": "loc_1", "period": "2022-02", "value": 15},
{"featureName": "rainfall", "orgUnit": "loc_2", "period": "2022-01", "value": 3.0},
{"featureName": "disease_cases", "orgUnit": "loc_2", "period": "2022-01", "value": 20},
],
"geojson": {
"type": "FeatureCollection",
"features": [
{
"id": "loc_1",
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [0, 0]},
"properties": {},
},
{
"id": "loc_2",
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [1, 1]},
"properties": {},
},
],
},
}

input_path = tmp_path / "request.json"
with open(input_path, "w") as f:
json.dump(request, f)

output_prefix = tmp_path / "result"
convert_request(input_path, output_prefix)

df = pd.read_csv(tmp_path / "result.csv")
assert set(df.columns) == {"location", "time_period", "disease_cases", "rainfall"}
assert len(df) == 3 # 2 periods for loc_1 + 1 period for loc_2

row = df[(df["location"] == "loc_1") & (df["time_period"] == "2022-01")].iloc[0]
assert row["rainfall"] == 1.5
assert row["disease_cases"] == 10

with open(tmp_path / "result.geojson") as f:
geojson = json.load(f)
assert len(geojson["features"]) == 2
1 change: 1 addition & 0 deletions tests/test_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"docs/feature_tutorials/extended_predictor.md",
"docs/chap-cli/evaluation-workflow.md",
# Workshop tutorials (instructional content, not testable code)
# Note: 11_feb_presession.md is tested in test_documentation_slow.py
"docs/kigali-workshop",
]

Expand Down
Loading