Skip to content

Commit 21d6a2f

Browse files
Add functions to calculate parallel speedup and parallel efficiency.
1 parent 3d47087 commit 21d6a2f

File tree

2 files changed

+125
-0
lines changed

2 files changed

+125
-0
lines changed

src/access/profiling/scaling.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Functions to calculate metrics related to parallel scaling of applications."""
5+
6+
import xarray as xr
7+
8+
from access.profiling.metrics import ProfilingMetric
9+
10+
11+
def parallel_speedup(stats: xr.Dataset, metric: ProfilingMetric) -> xr.DataArray:
12+
"""Calculates the parallel speedup from scaling data.
13+
14+
Args:
15+
stats (Dataset): Scaling data, stored as a xarray dataset.
16+
metric (ProfilingMetric): Metric to use for the speedup calculation.
17+
18+
Returns:
19+
DataArray: Parallel speedup.
20+
Raises:
21+
ValueError: If metric units are not time (e.g., seconds).
22+
"""
23+
if stats[metric].pint.dimensionality != "[time]":
24+
raise ValueError("Metric units must be time (e.g., seconds)!")
25+
speedup = stats[metric].sel(ncpus=stats["ncpus"].min()) / stats[metric]
26+
speedup.name = "speedup"
27+
return speedup
28+
29+
30+
def parallel_efficiency(stats: xr.Dataset, metric: ProfilingMetric) -> xr.DataArray:
31+
"""Calculates the parallel efficiency from scaling data.
32+
33+
Args:
34+
stats (Dataset): Scaling data, stored as a xarray dataset.
35+
metric (ProfilingMetric): Metric to use for the efficiency calculation.
36+
37+
Returns:
38+
DataArray: Parallel efficiency.
39+
"""
40+
speedup = parallel_speedup(stats, metric)
41+
eff = speedup * (speedup.ncpus.min() / speedup.ncpus)
42+
eff = eff.pint.to("percent")
43+
eff.name = "parallel efficiency"
44+
return eff

tests/test_scaling.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import pint
2+
import pytest
3+
import xarray as xr
4+
5+
from access.profiling.metrics import count, tavg
6+
from access.profiling.scaling import parallel_efficiency, parallel_speedup
7+
8+
9+
@pytest.fixture()
10+
def simple_scaling_data():
11+
"""Fixture instantiating a dataset containing scaling data.
12+
13+
The mock data contains two regions, "Region 1" and "Region 2", and two metrics, hits and tavg.
14+
Hits are always [1, 2] while tavg depends on the number of CPUs:
15+
- For 1 CPU: [600365 s, 2.345388 s]
16+
- For 2 CPUs: [300182.5 s, 1.172694 s]
17+
- For 4 CPUs: [300182.5 s, 1.172694 s]
18+
"""
19+
ncpus = [1, 2, 4]
20+
datasets = []
21+
for n in ncpus:
22+
datasets.append(
23+
xr.Dataset(
24+
data_vars={
25+
count: xr.DataArray([[1, 2]], dims=["ncpus", "region"]),
26+
tavg: xr.DataArray(
27+
[[value / min(n, 2) for value in [600365, 2.345388]]], dims=["ncpus", "region"]
28+
).pint.quantify("seconds"),
29+
},
30+
coords={"region": ["Region 1", "Region 2"], "ncpus": [n]},
31+
)
32+
)
33+
return xr.concat(datasets, dim="ncpus")
34+
35+
36+
def test_parallel_speedup(simple_scaling_data):
37+
"""Test parallel speedup calculation."""
38+
speedup = parallel_speedup(simple_scaling_data, tavg)
39+
40+
assert speedup.shape == (2, 3)
41+
assert speedup.name == "speedup"
42+
assert str(speedup.pint.units) == "dimensionless"
43+
assert speedup.attrs == {}
44+
assert list(speedup.coords) == ["region", "ncpus"]
45+
assert list(speedup["ncpus"].values) == [1, 2, 4]
46+
assert list(speedup["region"].values) == ["Region 1", "Region 2"]
47+
speedup = speedup.pint.dequantify() # Dequantify to remove warnings when getting values
48+
assert speedup.sel(ncpus=1, region="Region 1").values == pytest.approx(1.0)
49+
assert speedup.sel(ncpus=2, region="Region 1").values == pytest.approx(2.0)
50+
assert speedup.sel(ncpus=4, region="Region 1").values == pytest.approx(2.0)
51+
assert speedup.sel(ncpus=1, region="Region 2").values == pytest.approx(1.0)
52+
assert speedup.sel(ncpus=2, region="Region 2").values == pytest.approx(2.0)
53+
assert speedup.sel(ncpus=4, region="Region 2").values == pytest.approx(2.0)
54+
55+
56+
def test_parallel_efficiency(simple_scaling_data):
57+
"""Test parallel efficiency calculation."""
58+
ureg = pint.UnitRegistry()
59+
60+
eff = parallel_efficiency(simple_scaling_data, tavg)
61+
62+
assert eff.shape == (2, 3)
63+
assert eff.name == "parallel efficiency"
64+
assert str(eff.pint.units) == "percent"
65+
assert eff.attrs == {}
66+
assert list(eff.coords) == ["region", "ncpus"]
67+
assert list(eff["ncpus"].values) == [1, 2, 4]
68+
assert list(eff["region"].values) == ["Region 1", "Region 2"]
69+
eff = eff.pint.dequantify() # Dequantify to remove warnings when getting values
70+
assert eff.sel(ncpus=1, region="Region 1").values == pytest.approx(100 * ureg.percent)
71+
assert eff.sel(ncpus=2, region="Region 1").values == pytest.approx(100 * ureg.percent)
72+
assert eff.sel(ncpus=4, region="Region 1").values == pytest.approx(50 * ureg.percent)
73+
assert eff.sel(ncpus=1, region="Region 2").values == pytest.approx(100 * ureg.percent)
74+
assert eff.sel(ncpus=2, region="Region 2").values == pytest.approx(100 * ureg.percent)
75+
assert eff.sel(ncpus=4, region="Region 2").values == pytest.approx(50 * ureg.percent)
76+
77+
78+
def test_incorrect_units(simple_scaling_data):
79+
"""Test calculation with incorrect units."""
80+
with pytest.raises(ValueError):
81+
parallel_speedup(simple_scaling_data, count)

0 commit comments

Comments
 (0)