From 8dc806a08983a7bce4bbe7a668b8ace4529d854b Mon Sep 17 00:00:00 2001
From: Adam Zsarnoczay <33822153+zsarnoczay@users.noreply.github.com>
Date: Tue, 12 Aug 2025 16:06:17 -0700
Subject: [PATCH 1/2] Extend rWHALE to only look for requested results.

rWHALE used to blindly try to open every type of result file, collect
the information and then check what to save in the output. Instead, this
commit adds code that checks what outputs are requested and utilizes the
existing argument of aggregate_results to ask for only a subset of
output types if that's what the user needs.
---
 modules/Workflow/rWHALE.py     | 16 +++++++++++++++-
 modules/Workflow/whale/main.py |  3 +--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/modules/Workflow/rWHALE.py b/modules/Workflow/rWHALE.py
index 30da36e6a..1e9b9a2ab 100644
--- a/modules/Workflow/rWHALE.py
+++ b/modules/Workflow/rWHALE.py
@@ -266,6 +266,16 @@ def main(  # noqa: C901, D103
             comm.Barrier()
 
         # aggregate results
+        if inputs.get('outputs', False):
+            requested_outputs = []
+            for output_type in ['AIM', 'EDP', 'DM', 'DV']:
+                if inputs['outputs'].get(output_type, False):
+                    if inputs['outputs'][output_type]:
+                        requested_outputs.append(output_type)
+        else:
+            requested_outputs = ['AIM', 'EDP', 'DM', 'DV']
+        requested_outputs.append('every_realization')
+
         if (
             asset_type == 'Buildings'  # noqa: PLR1714
             or asset_type == 'TransportationNetwork'
@@ -273,7 +283,11 @@ def main(  # noqa: C901, D103
             or asset_type == 'PowerNetwork'
         ):
             if procID == 0:
-                WF.aggregate_results(asst_data=asst_data, asset_type=asset_type)
+                WF.aggregate_results(
+                    asst_data=asst_data, 
+                    asset_type=asset_type,
+                    out_types = requested_outputs
+                    )
 
         elif asset_type == 'WaterNetworkPipelines':
             # Provide the headers and out types
diff --git a/modules/Workflow/whale/main.py b/modules/Workflow/whale/main.py
index 1d264787f..34a98ff8f 100644
--- a/modules/Workflow/whale/main.py
+++ b/modules/Workflow/whale/main.py
@@ -2944,8 +2944,7 @@ def aggregate_results(  # noqa: C901, PLR0912, PLR0915
         self,
         asst_data,
         asset_type='',
-        # out_types = ['IM', 'BIM', 'EDP', 'DM', 'DV', 'every_realization'],
-        out_types=['AIM', 'EDP', 'DMG', 'DV', 'every_realization'],  # noqa: B006
+        out_types=['AIM', 'EDP', 'DM', 'DV', 'every_realization'],  # noqa: B006
         headers=None,
     ):
         """Short description

From a624b6d68422e5df312bd1f3505158c471ff66e2 Mon Sep 17 00:00:00 2001
From: Adam Zsarnoczay <33822153+zsarnoczay@users.noreply.github.com>
Date: Tue, 12 Aug 2025 16:11:29 -0700
Subject: [PATCH 2/2] Bugfix in rWHALE's aggregate loss output

The aggregate_results function was aggregating losses across components
for each asset. This is unnecessary since it is already done by Pelicun
earlier in the workflow. Aggregation can be more complex than simply
summing up losses for each component. So, it is more appropriate to use
the DL_summary_stats file directly and rely on Pelicun to take care of
the appropriate aggregation.

This commit modifies the code to drop the old functionality and utilize
DL_summary_stats.

In another, minor update, it also fixes a bug in naming.
R2Dres_mean_RepairCost is used when results are scaled with replacement
costs. When such scaling is not performed, the
R2Dres_mean_RepairCost_{cost_unit} naming is used. Consequently, when
loss ratios are returned by a method and we have replacement cost
available, rWHALE will return both the loss ratios and the absolute
losses.
---
 modules/Workflow/whale/main.py | 95 +++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 43 deletions(-)

diff --git a/modules/Workflow/whale/main.py b/modules/Workflow/whale/main.py
index 34a98ff8f..785254c56 100644
--- a/modules/Workflow/whale/main.py
+++ b/modules/Workflow/whale/main.py
@@ -3126,7 +3126,7 @@ def aggregate_results(  # noqa: C901, PLR0912, PLR0915
                             # deter_pointer[asset_id].update({
                             #     "R2Dres":r2d_res_i
                             # })
-                if 'DMG' in out_types:
+                if 'DM' in out_types:
                     dmg_out_file_i = 'DMG_grp.json'
 
                     if dmg_out_file_i not in os.listdir(asset_dir):
@@ -3188,16 +3188,25 @@ def aggregate_results(  # noqa: C901, PLR0912, PLR0915
 
                 if 'DV' in out_types:
                     dv_out_file_i = 'DV_repair_grp.json'
+                    dl_summary_file = 'DL_summary_stats.json'
 
                     if dv_out_file_i not in os.listdir(asset_dir):
                         show_warning(
                             f"Couldn't find DV file for {assetTypeHierarchy[-1]} {asset_id}"
                         )
 
+                    elif dl_summary_file not in os.listdir(asset_dir):
+                        show_warning(
+                            f"Couldn't find DL summary file for {assetTypeHierarchy[-1]} {asset_id}"
+                        )
+
                     else:
                         with open(asset_dir / dv_out_file_i, encoding='utf-8') as f:  # noqa: PTH123
                             dv_data_i = json.load(f)
 
+                        with open(asset_dir / dl_summary_file, encoding='utf-8') as f:  # noqa: PTH123
+                            dl_summary = json.load(f)
+
                         # extract DV unit info
                         dv_units = dv_data_i['Units']
                         del dv_data_i['Units']
@@ -3243,48 +3252,48 @@ def aggregate_results(  # noqa: C901, PLR0912, PLR0915
 
                         if 'DV' in R2D_res_out_types:
                             r2d_res_dv = dict()  # noqa: C408
-                            cost_columns = [
-                                col
-                                for col in dv_data_i.columns
-                                if col.startswith('Cost')
-                            ]
-                            if len(cost_columns) != 0:
-                                cost_data = dv_data_i[cost_columns].mean()
-                                cost_data_std = dv_data_i[cost_columns].std()
-                                cost_key = cost_data.idxmax()
-                                meanKey = (  # noqa: N806
-                                    f'R2Dres_mean_RepairCost_{dv_units[cost_key]}'
-                                )
-                                stdKey = (  # noqa: N806
-                                    f'R2Dres_std_RepairCost_{dv_units[cost_key]}'
-                                )
-                                r2d_res_dv.update(
-                                    {
-                                        meanKey: cost_data[cost_key],
-                                        stdKey: cost_data_std[cost_key],
-                                    }
-                                )
-                            time_columns = [
-                                col
-                                for col in dv_data_i.columns
-                                if col.startswith('Time')
-                            ]
-                            if len(time_columns) != 0:
-                                time_data = dv_data_i[time_columns].mean()
-                                time_data_std = dv_data_i[time_columns].std()
-                                time_key = time_data.idxmax()
-                                meanKey = (  # noqa: N806
-                                    f'R2Dres_mean_RepairTime_{dv_units[time_key]}'
-                                )
-                                stdKey = (  # noqa: N806
-                                    f'R2Dres_std_RepairTime_{dv_units[time_key]}'
-                                )
-                                r2d_res_dv.update(
-                                    {
-                                        meanKey: time_data[time_key],
-                                        stdKey: time_data_std[time_key],
-                                    }
-                                )
+
+                            if 'repair_cost' in dl_summary:
+                                repair_cost_data = dl_summary['repair_cost']
+
+                            elif 'repair_cost-' in dl_summary:
+                                repair_cost_data = dl_summary['repair_cost-']
+
+                            else:
+                                repair_cost_data = None
+
+                            if repair_cost_data:
+
+                                cost_unit = [unit for dv_output, unit in dv_units.items() if dv_output.startswith('Cost')][0]                            
+
+                                r2d_res_dv.update({
+                                    f'R2Dres_mean_RepairCost_{cost_unit}': repair_cost_data['mean'],
+                                    f'R2Dres_std_RepairCost_{cost_unit}': repair_cost_data['std']                                  
+                                    })
+
+                                if cost_unit == 'loss_ratio' and np.abs(replacement_cost-1.0)>1e-5:
+                                    r2d_res_dv.update({
+                                        f'R2Dres_mean_RepairCost': repair_cost_data['mean'] * replacement_cost,
+                                        f'R2Dres_std_RepairCost': repair_cost_data['std'] * replacement_cost                                    
+                                        })
+
+                            if 'repair_time' in dl_summary:
+                                repair_time_data = dl_summary['repair_time']
+
+                            elif 'repair_time-sequential' in dl_summary:
+                                repair_time_data = dl_summary['repair_time-sequential']
+
+                            else:
+                                repair_time_data = None
+
+                            if repair_time_data:
+
+                                time_unit = [unit for dv_output, unit in dv_units.items() if dv_output.startswith('Time')][0]
+
+                                r2d_res_dv.update({
+                                    f'R2Dres_mean_RepairTime_{time_unit}': repair_time_data['mean'],
+                                    f'R2Dres_std_RepairTime_{time_unit}': repair_time_data['std']
+                                    })
 
                             r2d_res_i = deter_pointer[asset_id].get('R2Dres', {})
                             r2d_res_i.update(r2d_res_dv)