Merge pull request #40 from RyanAugust/dev

RyanAugust · web-flow · commit c42bf75f955a · 2025-11-07T14:49:38.000-05:00
Study result generation
diff --git a/README.md b/README.md
@@ -31,6 +31,14 @@ sim = simmm()
 mmm_input_df, channel_roi = sim.run_with_config(config=cfg)
 ```
 
+### Run via CLI
+
+A configuration file is required as input for this and should be passed as seen below. An output path can also be passed via `-o`, however when not passed the current working directory will be used.
+
+```bash
+pysimmm -i example_config.yaml -o .
+```
+
 ### Run by stages
 
 Alternatively you may run each of the stages independently, which allows for easier debugging and in-run adjustments based on the results of each stage. The order of the stages is reflected below **(without their inputs)**. Once you've run through every stage, results are available by calling the `sim.final_df` object (channel ROI results are stored as `sim.channel_roi`).
@@ -48,14 +56,16 @@ sim.calculate_channel_roi()
 sim.finalize_output()
 ```
 
-### Run via CLI
+### Geographic distribution
 
-A configuration file is required as input for this and should be passed as seen below. An output path can also be passed via `-o`, however when not passed the current working directory will be used.
+Marketing Mix Models may use geographic grain data for the purposes of budget allocation or during the calibration phase. PySiMMMulator provies `geos` to facilitate the generation of rancomized geographies as well as a distribution funciton to allocated synthetic data across the geographies.
 
-```bash
-pysimmm -i example_config.yaml -o .
-```
 
+### Study simulation
+
+`study` and `batch_study` are also provided to simplify the simulated outcomes of marketing studies, which are an important component of MMM calibration.
+
+Within this framework studies results are drawn from a normal distribution about the true value of a channels effectiveness (defaulted to ROI within this package). Both `study` and `batch_study` provide the ability to pass bias and standard deviation prameters for stationary and non-stationary distributions—allowing users to replicate a diverse set of real-world measurement difficulties.
 ## Development
 
 Setting up a dev environment
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -10,10 +10,11 @@ Welcome to PySiMMMulator docs!
 
     Simulate <simulate.rst>
     Geos <geos.rst>
+    Studies <study.rst>
 
 Indices and tables
 ==================
 
 * :ref:`genindex`
 * :ref:`modindex`
-* :ref:`search`
+* :ref:`search`
diff --git a/docs/source/study.rst b/docs/source/study.rst
@@ -0,0 +1,10 @@
+.. toctree::
+   :maxdepth: 2
+
+Studies
+=======
+
+.. automodule:: pysimmmulator.study
+    :members:
+    :undoc-members:
+
diff --git a/src/pysimmmulator/VERSION b/src/pysimmmulator/VERSION
@@ -1 +1 @@
-0.4.4
+0.5.0
diff --git a/src/pysimmmulator/__init__.py b/src/pysimmmulator/__init__.py
@@ -2,10 +2,11 @@
 __author__ = "RyanAugust"
 __license__ = "MIT"
 __copyright__ = "Copyright 2025"
-__version__ = "0.4.4"
+__version__ = "0.5.0"
 
 import os
 
 from .simulate import simmm, multisimmm
 from .load_parameters import load_config, define_basic_params
 from .geos import geos, distribute_to_geos
+from .study import study, batch_study
diff --git a/src/pysimmmulator/study.py b/src/pysimmmulator/study.py
@@ -0,0 +1,128 @@
+"""Generation of calibration study results"""
+from typing import List, Optional, Dict
+import numpy as np 
+
+DEFAULT_STUDY_BIAS = 0.0 
+DEFAULT_STUDY_SCALE = 0.05
+
+class study:
+    """Object for generating study values from a normal distribution around true the true channel roi"""
+    def __init__(self, channel_name:str, true_roi:float, random_seed:int=None, bias:float=DEFAULT_STUDY_BIAS, stdev:float=DEFAULT_STUDY_SCALE) -> None:
+        self.channel_name = channel_name
+        self._true_roi = true_roi
+        self.rng = self._create_random_factory(seed=random_seed)
+        self._bias = bias
+        self._stdev = stdev
+
+    @property
+    def roi(self) -> float: 
+        """Reports the true ROI of the channel set at initializaiton
+
+        Returns:
+            true_roi (float): the true ROI value for the channel."""
+        return self._true_roi
+    
+    def _create_random_factory(self, seed: int) -> np.random.Generator:
+        """Internal helper that serves as a central random number generator, 
+        and can be initialized with a seed to enable testing.
+        
+        Args:
+            seed (int): Optional seed value for random number generation
+        Returns:
+            rng (np.random.Generator): random number generator"""
+        rng = np.random.default_rng(seed=seed)
+        return rng
+
+    def update_bias(self, value:float) -> None:
+        """Updates the distribution bias to the passed value
+
+        Args:
+            value (float): value to set the distribution bias to
+        Returns:
+            None"""
+        self._bias = value
+    
+    def update_stdev(self, value:float) -> None:
+        """Updates the distribution stdev to the passed value
+
+        Args:
+            value (float): value to set the distribution stdev to
+        Returns:
+            None"""
+        self._stdev = value
+    
+    def update_roi(self, value:float) -> None:
+        """Updates the roi assigned to the channel as the passed value
+
+        Args:
+            value (float): value to set the channel roi to
+        Returns:
+            None"""
+        self._true_roi = value
+
+    def generate(self, count:int=1) -> 'np.array': 
+        """Provides a study 'result'
+        
+        Args:
+            count (int): number of study results to return (default is 1)
+        Retuns:
+            study_results (iterable[float]): an array of study results """
+        return self.rng.normal(loc=self._true_roi + self._bias, scale=self._stdev, size=count)
+
+    def generate_dynamic(self, bias:list[float], stdev:list[float]) -> list:
+        """Provides study results with non-stationary distribution
+
+        Args:
+            bias (list[float]): iterable of bias values used to update the distribution per results
+            stdev (list[float]): iterable of stdev values used to update the distribution per results
+        Returns:
+            study_results (iterable[float]): an array of study results """
+        results = []
+        for b, z in zip(bias, stdev):
+            self.update_bias(b)
+            self.update_stdev(z)
+            results.append(self.generate()[0])
+        return results
+
+class batch_study:
+    """Object for generating study values across all channels"""
+    def __init__(self, channel_rois:dict, channel_distributions:dict[str, dict]=dict(), random_seed:int=None, bias:float=DEFAULT_STUDY_BIAS, stdev:float=DEFAULT_STUDY_SCALE) -> None:
+        self._study_hold = {k: study(channel_name=k, true_roi=v, random_seed=random_seed, bias=channel_distributions.get(k, {}).get("bias",bias), stdev=channel_distributions.get(k, {}).get("stdev",stdev)) for k, v in channel_rois.items()}
+
+    def generate(self, count:int=1) -> dict[str, 'np.array']:
+        """Produces study results for all of the registered channels
+
+        Args:
+            count (int): number of study results to return (default is 1)
+        Retuns:
+            study_results (dict[iterable[float]]): an array of study results"""
+        return {k: v.generate(count) for k, v in self._study_hold.items()}
+
+    def generate_dynamic(self, universal_bias: Optional[List[float]] = None, universal_stdev: Optional[List[float]] = None, 
+                         channel_bias: Optional[dict[str, list[float]]]=None, channel_stdev: Optional[dict[str, list[float]]]=None) -> dict[str, list[float]]:
+        """Produces study results for all of the registered channels
+
+        Args:
+            universal_bias (List[float]): iterable of bias values used to update the distribution per results
+            universal_stdev (List[float]): iterable of stdev values used to update the distribution per results
+            channel_bias (dict[str, list[float]]): lookup of iterable of bias values used to update the distribution per results
+            channel_stdev (dict[str, list[float]]): iterable of stdev values used to update the distribution per results
+        Returns:
+            study_results (iterable[float]): an array of study results """
+        assert all(x is not None for x in [universal_bias, universal_stdev]) or all(x is not None for x in [channel_bias, channel_stdev]), "both Universal or both channel specs must be passed"
+        results = {channel: [] for channel in self._study_hold.keys()}
+        if all(x is not None for x in [universal_bias, universal_stdev]):
+            for b, z in zip(universal_bias, universal_stdev):
+                for channel, study in self._study_hold.items():
+                    study.update_bias(b)
+                    study.update_stdev(z)
+                    results[channel].append(study.generate()[0])
+            return results
+        if all(x is not None for x in [channel_bias, channel_stdev]):
+            for channel, study in self._study_hold.items():
+                for b, z in zip(channel_bias[channel], channel_stdev[channel]):
+                    study.update_bias(b)
+                    study.update_stdev(z)
+                    results[channel].append(study.generate()[0])
+            return results
+
diff --git a/tests/test_study.py b/tests/test_study.py
@@ -0,0 +1,30 @@
+from pysimmmulator import study, batch_study
+
+def test_single_channel_study():
+    google = study("search", 1.600)
+    assert 0.0 < google.generate()[0] < 3.20
+
+def test_dynamic_single_channel_study():
+    google = study("search", 1.600)
+    results = google.generate_dynamic([i/10.0 for i in range(5)], [i/10.0 for i in range(5)])
+    assert len(results) == 5 
+
+def test_multiple_channel_study():
+    channel_spec = {"google_search": 1.600, "youtube": 9.01}
+    batch = batch_study(channel_spec)
+    assert 0.0 < batch.generate()["youtube"][0] < 20.06
+
+def test_dynamic_multiple_channel_universal_study():
+    channel_spec = {"google_search": 1.600, "youtube": 9.01}
+    batch = batch_study(channel_spec)
+    results = batch.generate_dynamic(universal_bias=[i/10.0 for i in range(5)], universal_stdev=[i/10.0 for i in range(5)])
+    assert 0.0 < results["youtube"][0] < 20.06
+
+def test_dynamic_multiple_channel_study():
+    channel_spec = {"google_search": 1.600, "youtube": 9.01}
+    batch = batch_study(channel_spec)
+    results = batch.generate_dynamic(channel_bias={"google_search":[i/10.0 for i in range(5)],
+                                                   "youtube":[i/10.0 for i in range(5)]},
+                                     channel_stdev={"google_search":[i/10.0 for i in range(5)],
+                                                    "youtube":[i/10.0 for i in range(5)]})
+    assert 0.0 < results["youtube"][0] < 20.06