Add confidence interval for relative lift.

marcolongfils · marcolongfils · commit 4e85e6e41f06 · 2022-10-11T09:31:51.000Z
PiperOrigin-RevId: 480275918
diff --git a/matched_markets/methodology/tbr_iroas.py b/matched_markets/methodology/tbr_iroas.py
@@ -23,7 +23,7 @@
 import pandas as pd
 
 
-class TBRiROAS(object):
+class TBRiROAS():
   """Time Based Regression geoexperiment methodology.
 
   This class estimates the incremental Return on Ad Spend (iROAS)
@@ -138,7 +138,7 @@ def summary(self,
       - estimate. The median estimate of iROAS.
       - precision. Distance between the (1-level)/tails and 0.5 quantiles.
       - lower. The value of the (1-level)/tails quantile.
-      - upper. If tails=2, the level/tails quantile, otherwise inf.
+      - upper. If tails=2, the 1 - 0.5 * (1 - level) quantile, otherwise inf.
       - probability. The probability that Delta > posterior_threshold.
       - level. Records the level parameter used to generate the report.
       - posterior_threshold. Records the posterior_threshold parameter.
@@ -158,6 +158,28 @@ def summary(self,
     else:
       periods = (self.periods.test,)
 
+    tail_probability = (1 - level) / tails
+
+    response_data = self.tbr_response.analysis_data.copy().reset_index()
+    observed_treatment_response = response_data.loc[
+        (response_data[self.df_names.group] == self.groups.treatment) &
+        (response_data['period'].isin(periods)), self.df_names.response].sum()
+
+    # Obtain the distributions of the causal effects on response
+    delta_response = self.tbr_response.causal_cumulative_distribution(time=-1)
+    # Simulate the incremental response and relative lift
+    sims_response = delta_response.rvs(nsims, random_state=random_state)
+    sims_relative_lift = sims_response / (
+        observed_treatment_response - sims_response)
+    relative_lift = np.median(sims_relative_lift)
+    relative_lift_lower = np.percentile(sims_relative_lift,
+                                        100 * tail_probability)
+    if tails == 1:
+      relative_lift_upper = np.inf
+    else:
+      relative_lift_upper = np.percentile(sims_relative_lift,
+                                          100 * (1.0 - tail_probability))
+
     # iROAS analysis assuming fixed costs.
     if self._is_fixed_cost_scenario():
       cost = np.sum(self.tbr_cost.causal_effect(periods))
@@ -168,28 +190,29 @@ def summary(self,
       report['incremental_cost'] = cost
       causal_effect = self.tbr_response.causal_effect(periods)
       report['incremental_response'] = np.sum(causal_effect)
+      report['incremental_response_lower'] = report['lower'] * cost
+      report['incremental_response_upper'] = report['upper'] * cost
+      report['relative_lift'] = relative_lift
+      report['relative_lift_lower'] = relative_lift_lower
+      report['relative_lift_upper'] = relative_lift_upper
       report['scenario'] = 'fixed'
       # Return the report, less the scale column.
       return report.drop('scale', axis=1)
 
     # iROAS analysis with variable costs modelled via TBR.
     else:
-      alpha = (1 - level)/tails
 
-      # Obtain the distributions of the two sets of causal effects
-      delta_response = self.tbr_response.causal_cumulative_distribution(time=-1)
+      # Obtain the distributions of the causal effects on cost
       # We know that causal costs only arose during the test period.
       delta_cost = self.tbr_cost.causal_cumulative_distribution(
           periods=(self.periods.test,),
           time=-1)
 
       # Simulate the iROAS
       sims_cost = delta_cost.rvs(nsims, random_state=random_state)
-      sims_response = delta_response.rvs(nsims, random_state=random_state)
       sims_iroas = sims_response / sims_cost
 
-      # This needs to be used twice.
-      ci_lower = np.percentile(sims_iroas, 100 * alpha)
+      ci_lower = np.percentile(sims_iroas, 100 * tail_probability)
 
       # Construct the report.
       causal_effect = self.tbr_cost.causal_effect(periods)
@@ -200,13 +223,22 @@ def summary(self,
       report['lower'] = ci_lower
       if tails == 1:
         report['upper'] = np.inf
+        report['incremental_response_upper'] = np.inf
       else:
-        report['upper'] = np.percentile(sims_iroas, 100 * (1 - alpha))
+        report['upper'] = np.percentile(sims_iroas,
+                                        100 * (1 - tail_probability))
+        report['incremental_response_upper'] = delta_response.ppf(
+            1 - tail_probability)
       report['probability'] = np.mean(sims_iroas > posterior_threshold)
       report['level'] = level
       report['posterior_threshold'] = posterior_threshold
       report['incremental_cost'] = delta_cost.kwds['loc']
       report['incremental_response'] = delta_response.kwds['loc']
+      report['incremental_response_lower'] = delta_response.ppf(
+          tail_probability)
+      report['relative_lift'] = relative_lift
+      report['relative_lift_lower'] = relative_lift_lower
+      report['relative_lift_upper'] = relative_lift_upper
       report['scenario'] = 'variable'
 
       return report
@@ -276,7 +308,7 @@ def estimate_pointwise_and_cumulative_effect(
                        f'got {metric}')
 
     periods = (self.periods.pre, self.periods.test, self.periods.cooldown)
-    alpha = (1 - level) / tails
+    tail_probability = (1 - level) / tails
 
     metric_data = metric_df.analysis_data.copy().reset_index()
 
@@ -321,11 +353,11 @@ def estimate_pointwise_and_cumulative_effect(
       delta_metric = metric_df.causal_cumulative_distribution()
       pointwise_difference = metric_df.causal_effect(
           periods).reset_index().rename(columns={0: 'metric'})
-      lower = np.diff(delta_metric.ppf(alpha), prepend=0)
+      lower = np.diff(delta_metric.ppf(tail_probability), prepend=0)
       lower = np.concatenate((pointwise_difference.loc[
           pointwise_difference['date'] < test_start_date,
           'metric'].values, lower))
-      upper = np.diff(delta_metric.ppf(1 - alpha), prepend=0)
+      upper = np.diff(delta_metric.ppf(1 - tail_probability), prepend=0)
       upper = np.concatenate((pointwise_difference.loc[
           pointwise_difference['date'] < test_start_date,
           'metric'].values, upper))
@@ -358,8 +390,8 @@ def estimate_pointwise_and_cumulative_effect(
       cumulative_effect_df = common_classes.EstimatedTimeSeriesWithConfidenceInterval(
           {
               'date': experiment_dates,
-              'lower': delta_metric.ppf(alpha),
-              'upper': delta_metric.ppf(1 - alpha),
+              'lower': delta_metric.ppf(tail_probability),
+              'upper': delta_metric.ppf(1 - tail_probability),
               'estimate': cumulative_effect
           })
 
diff --git a/matched_markets/methodology/tbrmmdiagnostics.py b/matched_markets/methodology/tbrmmdiagnostics.py
@@ -329,7 +329,7 @@ def bbtest(self) -> Optional[BBTestResult]:
       return None
 
     if self._bbtest is None:
-      a, _, sigma, resid = self.pretestfit
+      a, _, sigma, resid = self.pretestfit  # pytype: disable=attribute-error  # strict-namedtuple-checks
       # Failure to fit the regression implies failure of the test.
       if np.isnan(a):
         self._bbtest = BBTestResult(False, None, None)
@@ -434,7 +434,7 @@ def aatest(self) -> Optional[AATestResult]:
     diag.x = x[:-n_test]
     yt = y[-n_test:].mean()
     xt = x[-n_test:].mean()
-    estimate, cihw, sigma, _ = diag.tbrfit(xt, yt)
+    estimate, cihw, sigma, _ = diag.tbrfit(xt, yt)  # pytype: disable=attribute-error  # strict-namedtuple-checks
     bounds = (estimate - cihw, estimate + cihw)
     lower, upper = bounds
     if lower * upper < 0:
diff --git a/matched_markets/methodology/utils.py b/matched_markets/methodology/utils.py
@@ -201,7 +201,8 @@ def expand_time_windows(periods: List[TimeWindow]) -> List[pd.Timestamp]:
   """
   days_exclude = []
   for window in periods:
-    days_exclude += pd.date_range(window.first_day, window.last_day, freq='D')
+    days_exclude += pd.date_range(
+        window.first_day, window.last_day, freq='D').to_list()
 
   return list(set(days_exclude))
 
diff --git a/matched_markets/notebook/post_analysis_colab_for_tbrmm.ipynb b/matched_markets/notebook/post_analysis_colab_for_tbrmm.ipynb
@@ -378,15 +378,12 @@
         "          f'{results.posterior_threshold.values[0]}:' +\n",
         "          f' {results.probability.values[0]}')\n",
         "\n",
-        "    treatment_response = geox_data.loc[(geox_data[\"assignment\"] == 1) \u0026 (\n",
-        "        geox_data[\"period\"].isin(period_to_use[ind])), \"response\"].sum()\n",
-        "\n",
         "    print('\\nincremental cost = {}'.format(\n",
         "        human_readable_number(results.incremental_cost.values[0])))\n",
         "    print('\\nincremental response = {}'.format(\n",
         "        human_readable_number(results.incremental_response.values[0])))\n",
         "    print('\\nincremental response as % of treatment response = {:.2f}%\\n'.format(\n",
-        "          results.incremental_response.values[0] * 100 / treatment_response))"
+        "          results.relative_lift.values[0] * 100))"
       ]
     },
     {
diff --git a/matched_markets/tests/test_tbr_iroas.py b/matched_markets/tests/test_tbr_iroas.py
@@ -62,6 +62,8 @@ def setUp(self):
         key_group=self.key_group,
         key_period=self.key_period,
         key_date=self.key_date)
+    self.treatment_response = self.data.loc[(self.data[self.key_group] == 2) & (
+        self.data[self.key_period] == 1), self.key_response].sum()
 
   def testFixedCostIROASSummary(self):
     """Checks the TBR results for an holdback experiment."""
@@ -86,16 +88,25 @@ def testFixedCostIROASSummary(self):
     r_incr_resp = 147337.122
     r_incr_cost = 50000
     r_probability = 1.0
+    r_incr_response_lower = r_lower * r_incr_cost
+    r_lift = 0.173998
+    r_lift_lower = 0.165686
 
     # Summary values from python.
     iroas = self.iroas_model.summary(
         level=level, posterior_threshold=posterior_threshold, tails=tails)
     py_estimate = iroas['estimate'].iloc[0]
     py_precision = iroas['precision'].iloc[0]
     py_lower = iroas['lower'].iloc[0]
+    py_upper = iroas['upper'].iloc[0]
     py_incr_resp = iroas['incremental_response'].iloc[0]
     py_incr_cost = iroas['incremental_cost'].iloc[0]
     py_probability = iroas['probability'].iloc[0]
+    py_incr_resp_lower = iroas['incremental_response_lower'].iloc[0]
+    py_incr_resp_upper = iroas['incremental_response_upper'].iloc[0]
+    py_lift = iroas['relative_lift'].iloc[0]
+    py_lift_lower = iroas['relative_lift_lower'].iloc[0]
+    py_lift_upper = iroas['relative_lift_upper'].iloc[0]
 
     # Must do it like this as the R value is given with lower number of dps.
     order_estimate = utils.float_order(r_estimate - py_estimate)
@@ -104,14 +115,23 @@ def testFixedCostIROASSummary(self):
     order_iresp = utils.float_order(r_incr_resp - py_incr_resp)
     order_icost = utils.float_order(r_incr_cost - py_incr_cost)
     order_probability = utils.float_order(r_probability - py_probability)
-
+    order_iresp_lower = utils.float_order(r_incr_response_lower -
+                                          py_incr_resp_lower)
+    order_lift = utils.float_order(r_lift - py_lift)
+    order_lift_lower = utils.float_order(r_lift_lower - py_lift_lower)
     # Conduct the tests.
     self.assertLess(order_estimate, -5)
     self.assertLess(order_precision, -5)
     self.assertLess(order_lower, -5)
+    self.assertEqual(py_upper, np.inf)
     self.assertLess(order_iresp, -2)  # incremental_response is a larger number.
     self.assertLess(order_icost, -5)
     self.assertLess(order_probability, -5)
+    self.assertLessEqual(order_iresp_lower, -2)
+    self.assertEqual(py_incr_resp_upper, np.inf)
+    self.assertLessEqual(order_lift, -4)
+    self.assertLessEqual(order_lift_lower, -4)
+    self.assertEqual(py_lift_upper, np.inf)
 
   def testVariableCostIROASSummary(self):
     """Checks the TBR results for an go-dark/heavy-up experiment."""
@@ -134,14 +154,15 @@ def testVariableCostIROASSummary(self):
     tails = 1
 
     # Summary values from R, treated as constants.
-    # pylint: disable=invalid-name
-    R_ESTIMATE = 2.946742
-    R_PRECISION = 0.120548
-    R_LOWER = 2.826194
-    R_INCR_RESP = 147337.122
-    R_INCR_COST = 50000
-    R_PROBABILITY = 1.0
-    # pylint: enable=invalid-name
+    r_estimate = 2.946742
+    r_precision = 0.120548
+    r_lower = 2.826194
+    r_incr_resp = 147337.122
+    r_incr_cost = 50000
+    r_probability = 1.0
+    r_incr_resp_lower = r_lower * r_incr_cost
+    r_lift = 0.173998
+    r_lift_lower = 0.165686
 
     # Summary values from python. Specify random_state to make results
     # deterministic.
@@ -153,25 +174,40 @@ def testVariableCostIROASSummary(self):
     py_estimate = iroas['estimate'].iloc[0]
     py_precision = iroas['precision'].iloc[0]
     py_lower = iroas['lower'].iloc[0]
+    py_upper = iroas['upper'].iloc[0]
     py_incr_resp = iroas['incremental_response'].iloc[0]
     py_incr_cost = iroas['incremental_cost'].iloc[0]
     py_probability = iroas['probability'].iloc[0]
+    py_incr_resp_lower = iroas['incremental_response_lower'].iloc[0]
+    py_incr_resp_upper = iroas['incremental_response_upper'].iloc[0]
+    py_lift = iroas['relative_lift'].iloc[0]
+    py_lift_lower = iroas['relative_lift_lower'].iloc[0]
+    py_lift_upper = iroas['relative_lift_upper'].iloc[0]
 
     # Must do it like this as the R value is given with lower number of dps.
-    order_estimate = utils.float_order(R_ESTIMATE - py_estimate)
-    order_precision = utils.float_order(R_PRECISION - py_precision)
-    order_lower = utils.float_order(R_LOWER - py_lower)
-    order_iresp = utils.float_order(R_INCR_RESP - py_incr_resp)
-    order_icost = utils.float_order(R_INCR_COST - py_incr_cost)
-    order_probability = utils.float_order(R_PROBABILITY - py_probability)
-
+    order_estimate = utils.float_order(r_estimate - py_estimate)
+    order_precision = utils.float_order(r_precision - py_precision)
+    order_lower = utils.float_order(r_lower - py_lower)
+    self.assertEqual(py_upper, np.inf)
+    order_iresp = utils.float_order(r_incr_resp - py_incr_resp)
+    order_icost = utils.float_order(r_incr_cost - py_incr_cost)
+    order_probability = utils.float_order(r_probability - py_probability)
+    order_iresp_lower = utils.float_order(r_incr_resp_lower -
+                                          py_incr_resp_lower)
+    order_lift = utils.float_order(r_lift - py_lift)
+    order_lift_lower = utils.float_order(r_lift_lower - py_lift_lower)
     # Conduct the tests. Easier threshold as we added some noise.
     self.assertLess(order_estimate, -2)
     self.assertLess(order_precision, -2)
     self.assertLess(order_lower, -2)
     self.assertLess(order_iresp, -2)  # incremental_response is a larger number.
     self.assertLess(order_icost, -2)
     self.assertLess(order_probability, -2)
+    self.assertLessEqual(order_iresp_lower, -2)
+    self.assertEqual(py_incr_resp_upper, np.inf)
+    self.assertLessEqual(order_lift, -4)
+    self.assertLessEqual(order_lift_lower, -4)
+    self.assertEqual(py_lift_upper, np.inf)
 
   def testVariableCostIROASSummaryTwoTails(self):
     """Checks the TBR results when reporting two sided CI."""
@@ -194,15 +230,18 @@ def testVariableCostIROASSummaryTwoTails(self):
     tails = 2
 
     # Summary values from R, treated as constants.
-    # pylint: disable=invalid-name
-    R_ESTIMATE = 2.947012
-    R_PRECISION = 0.1557932
-    R_LOWER = 2.79135
-    R_UPPER = 3.102936
-    R_INCR_RESP = 147337.122
-    R_INCR_COST = 50000
-    R_PROBABILITY = 1.0
-    # pylint: enable=invalid-name
+    r_estimate = 2.947012
+    r_precision = 0.1557932
+    r_lower = 2.79135
+    r_upper = 3.102936
+    r_incr_resp = 147337.122
+    r_incr_cost = 50000
+    r_probability = 1.0
+    r_incr_resp_lower = r_lower * r_incr_cost
+    r_incr_resp_upper = r_upper * r_incr_cost
+    r_lift = 0.173998
+    r_lift_lower = 0.163273
+    r_lift_upper = 0.184885
 
     # Summary values from python. Specify random_state to make results
     # deterministic.
@@ -218,15 +257,31 @@ def testVariableCostIROASSummaryTwoTails(self):
     py_incr_resp = iroas['incremental_response'].iloc[0]
     py_incr_cost = iroas['incremental_cost'].iloc[0]
     py_probability = iroas['probability'].iloc[0]
-
+    py_incr_resp_lower = iroas['incremental_response_lower'].iloc[0]
+    py_incr_resp_upper = iroas['incremental_response_upper'].iloc[0]
+    py_lift = iroas['relative_lift'].iloc[0]
+    py_lift_lower = iroas['relative_lift_lower'].iloc[0]
+    py_lift_upper = iroas['relative_lift_upper'].iloc[0]
+
+    print(r_lift_lower)
+    print(py_lift_lower)
     # Must do it like this as the R value is given with lower number of dps.
-    order_estimate = utils.float_order(R_ESTIMATE - py_estimate)
-    order_precision = utils.float_order(R_PRECISION - py_precision)
-    order_lower = utils.float_order(R_LOWER - py_lower)
-    order_upper = utils.float_order(R_UPPER - py_upper)
-    order_iresp = utils.float_order(R_INCR_RESP - py_incr_resp)
-    order_icost = utils.float_order(R_INCR_COST - py_incr_cost)
-    order_probability = utils.float_order(R_PROBABILITY - py_probability)
+    order_estimate = utils.float_order(r_estimate - py_estimate)
+    order_precision = utils.float_order(r_precision - py_precision)
+    order_lower = utils.float_order(r_lower - py_lower)
+    order_upper = utils.float_order(r_upper - py_upper)
+    order_iresp = utils.float_order(r_incr_resp - py_incr_resp)
+    order_icost = utils.float_order(r_incr_cost - py_incr_cost)
+    order_probability = utils.float_order(r_probability - py_probability)
+    # Use relative error for incremental response due to different RNG in R and
+    # Python
+    order_iresp_lower = utils.float_order(
+        (r_incr_resp_lower - py_incr_resp_lower) * 100 / r_incr_resp_lower)
+    order_iresp_upper = utils.float_order(
+        (r_incr_resp_upper - py_incr_resp_upper) * 100 / r_incr_resp_upper)
+    order_lift = utils.float_order(r_lift - py_lift)
+    order_lift_lower = utils.float_order(r_lift_lower - py_lift_lower)
+    order_lift_upper = utils.float_order(r_lift_upper - py_lift_upper)
 
     # Conduct the tests. Easier threshold as we added some noise.
     self.assertLess(order_estimate, -2)
@@ -236,6 +291,11 @@ def testVariableCostIROASSummaryTwoTails(self):
     self.assertLess(order_iresp, -2)  # incremental_response is a larger number.
     self.assertLess(order_icost, -2)
     self.assertLess(order_probability, -2)
+    self.assertLessEqual(order_iresp_lower, -2)
+    self.assertLessEqual(order_iresp_upper, -2)
+    self.assertLessEqual(order_lift, -4)
+    self.assertLessEqual(order_lift_lower, -4)
+    self.assertLessEqual(order_lift_upper, -4)
 
   def testIROASSummaryWithCooldown(self):
     """Checks the TBR results when including the cooldown period in the analysis."""
diff --git a/matched_markets/tests/test_utils.py b/matched_markets/tests/test_utils.py