|
| 1 | +import pandas as pd |
| 2 | +import numpy as np |
| 3 | + |
| 4 | +from ods_tools.combine.common import oasis_float |
| 5 | + |
| 6 | +dtypes_al = { |
| 7 | + 'groupset_id': 'i4', |
| 8 | + 'SummaryId': 'i4', |
| 9 | + 'LossType': 'i4', |
| 10 | + 'Mean': oasis_float, |
| 11 | + 'Std': oasis_float |
| 12 | + |
| 13 | +} |
| 14 | + |
| 15 | +dtypes_ep = { |
| 16 | + 'groupset_id': 'i4', |
| 17 | + 'SummaryId': 'i4', |
| 18 | + 'EPCalc': 'i4', |
| 19 | + 'EPType': 'i4', |
| 20 | + 'RP': oasis_float, |
| 21 | + 'Loss': oasis_float |
| 22 | +} |
| 23 | + |
| 24 | + |
| 25 | +def generate_alt(gplt, max_period): |
| 26 | + # TODO: mean loss sampling results in inf + NaN values |
| 27 | + aal_group = gplt.groupby(by=["groupset_id", "SummaryId", "LossType"], as_index=False) |
| 28 | + |
| 29 | + records = [] |
| 30 | + for name, group in aal_group: |
| 31 | + mean_loss = group["Loss"].sum() / max_period |
| 32 | + std_loss = np.sqrt(((mean_loss - group["Loss"])**2).sum() / (max_period - 1)) |
| 33 | + |
| 34 | + record = { |
| 35 | + "groupset_id": name[0], |
| 36 | + "SummaryId": name[1], |
| 37 | + "LossType": name[2], |
| 38 | + "Mean": mean_loss, |
| 39 | + "Std": std_loss |
| 40 | + } |
| 41 | + |
| 42 | + records.append(record) |
| 43 | + |
| 44 | + return pd.DataFrame(records).astype(dtypes_al) |
| 45 | + |
| 46 | + |
| 47 | +def assign_exceedance_probability(df, max_period): |
| 48 | + original_cols = list(df.columns) |
| 49 | + df["rank"] = (df.groupby(by=["groupset_id", "SummaryId", "EPCalc"], as_index=False)["Loss"] |
| 50 | + .rank(method="first", ascending=False)) |
| 51 | + df["RP"] = max_period / df["rank"] |
| 52 | + return df[original_cols + ["RP"]] |
| 53 | + |
| 54 | + |
| 55 | +def generate_ept(gplt, max_group_period, oep=True, aep=True): |
| 56 | + ep_groups = ( |
| 57 | + gplt.rename(columns={"LossType": "EPCalc"}) # check if this is the correct type |
| 58 | + .groupby(by=["groupset_id", "groupeventset_id", |
| 59 | + "EventId", "GroupPeriod", "SummaryId", |
| 60 | + "EPCalc"], as_index=False) |
| 61 | + ) |
| 62 | + grouped_df = ep_groups["Loss"].agg("sum") |
| 63 | + grouped_df = grouped_df.groupby(by=["groupset_id", "SummaryId", "GroupPeriod", "EPCalc"], as_index=False) |
| 64 | + |
| 65 | + ep_frags = [] |
| 66 | + if oep: |
| 67 | + oep_df = ( |
| 68 | + grouped_df.pipe(lambda gp: gp["Loss"].max()) |
| 69 | + .pipe(assign_exceedance_probability, max_period=max_group_period) |
| 70 | + .pipe(lambda x: x.assign(EPType=1)) # todo check OEP TVAR EPCalc 2 |
| 71 | + ) |
| 72 | + |
| 73 | + ep_frags.append(oep_df) |
| 74 | + |
| 75 | + if aep: |
| 76 | + aep_df = ( |
| 77 | + grouped_df.pipe(lambda gp: gp["Loss"].sum()) |
| 78 | + .pipe(assign_exceedance_probability, max_period=max_group_period) |
| 79 | + .pipe(lambda x: x.assign(EPType=3)) # todo check AEP TVAR EPCalc 4 |
| 80 | + ) |
| 81 | + ep_frags.append(aep_df) |
| 82 | + |
| 83 | + return ( |
| 84 | + pd.concat(ep_frags)[["groupset_id", "SummaryId", "EPCalc", "EPType", "RP", "Loss"]] |
| 85 | + .astype(dtypes_ep) |
| 86 | + .sort_values(by=["groupset_id", "SummaryId", "EPType", "EPCalc", "Loss"], |
| 87 | + ascending=[True, True, True, True, False]) |
| 88 | + ) |
0 commit comments