Merge pull request #84 from oliveira-caio/tier_modality

pollytur · web-flow · commit ee767fb34b82 · 2025-10-15T18:30:04.000+02:00
added TimeIntervalInterpolator and tests for it.
diff --git a/experanto/filters/common_filters.py b/experanto/filters/common_filters.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from experanto.interpolators import Interpolator
+from experanto.interpolators import SequenceInterpolator
 from experanto.intervals import (
     TimeInterval,
     find_complement_of_interval_array,
@@ -9,8 +9,9 @@
 
 
 def nan_filter(vicinity=0.05):
-
-    def implementation(device_: Interpolator):
+    def implementation(device_: SequenceInterpolator):
+        # requests SequenceInterpolator as uses time_delta internally
+        # and other interpolators don't have it
         time_delta = device_.time_delta
         start_time = device_.start_time
         end_time = device_.end_time
diff --git a/experanto/interpolators.py b/experanto/interpolators.py
@@ -58,9 +58,11 @@ def create(root_folder: str, cache_data: bool = False, **kwargs) -> "Interpolato
                 return SequenceInterpolator(root_folder, cache_data, **kwargs)
         elif modality == "screen":
             return ScreenInterpolator(root_folder, cache_data, **kwargs)
+        elif modality == "time_interval":
+            return TimeIntervalInterpolator(root_folder, cache_data, **kwargs)
         else:
             raise ValueError(
-                f"There is no interpolator for {modality}. Please use 'sequence' or 'screen' as modality."
+                f"There is no interpolator for {modality}. Please use 'sequence', 'screen' or 'time_interval' as modality."
             )
 
     def valid_times(self, times: np.ndarray) -> np.ndarray:
@@ -451,7 +453,6 @@ def interpolate(self, times: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
             if ((len(data.shape) == 2) or (data.shape[-1] == 3)) and (
                 len(data.shape) < 4
             ):
-
                 data = np.expand_dims(data, axis=0)
             idx_for_this_file = np.where(self._data_file_idx[idx] == u_idx)
             if self.rescale:
@@ -478,6 +479,97 @@ def rescale_frame(self, frame: np.array) -> np.array:
         )
 
 
+class TimeIntervalInterpolator(Interpolator):
+    def __init__(self, root_folder: str, cache_data: bool = False, **kwargs):
+        super().__init__(root_folder)
+        self.cache_data = cache_data
+
+        meta = self.load_meta()
+        self.meta_labels = meta["labels"]
+        self.start_time = meta["start_time"]
+        self.end_time = meta["end_time"]
+        self.valid_interval = TimeInterval(self.start_time, self.end_time)
+
+        if self.cache_data:
+            self.labeled_intervals = {
+                label: np.load(self.root_folder / filename)
+                for label, filename in self.meta_labels.items()
+            }
+
+    def interpolate(self, times: np.ndarray) -> np.ndarray:
+        """
+        Interpolate time intervals for labeled events.
+
+        Given a set of time points and a set of labeled intervals (defined in the
+        `meta.yml` file), this method returns a boolean array indicating, for each
+        time point, whether it falls within any interval for each label.
+
+        The method uses half-open intervals [start, end), where a timestamp t is
+        considered to fall within an interval if start <= t < end. This means the
+        start time is inclusive and the end time is exclusive.
+
+        Parameters
+        ----------
+        times : np.ndarray
+            Array of time points to be checked against the labeled intervals.
+
+        Returns
+        -------
+        out : np.ndarray of bool, shape (len(valid_times), n_labels)
+            Boolean array where each row corresponds to a valid time point and each
+            column corresponds to a label. `out[i, j]` is True if the i-th valid
+            time falls within any interval for the j-th label, and False otherwise.
+
+        Notes
+        -----
+        - The labels and their corresponding intervals are defined in the `meta.yml`
+          file under the `labels` key. Each label points to a `.npy` file containing
+          an array of shape (n, 2), where each row is a [start, end) time interval.
+        - Typical labels might include 'train', 'validation', 'test', 'saccade',
+          'gaze', or 'target'.
+        - Only time points within the valid interval (as defined by start_time and
+          end_time in meta.yml) are considered; others are filtered out.
+        - Intervals where start > end are considered invalid and will trigger a
+          warning.
+        """
+        valid = self.valid_times(times)
+        valid_times = times[valid]
+
+        n_labels = len(self.meta_labels)
+        n_times = len(valid_times)
+
+        if n_times == 0:
+            warnings.warn(
+                "TimeIntervalInterpolator returns an empty array, no valid times queried."
+            )
+            return np.empty((0, n_labels), dtype=bool)
+
+        out = np.zeros((n_times, n_labels), dtype=bool)
+        for i, (label, filename) in enumerate(self.meta_labels.items()):
+            if self.cache_data:
+                intervals = self.labeled_intervals[label]
+            else:
+                intervals = np.load(self.root_folder / filename, allow_pickle=True)
+
+            if len(intervals) == 0:
+                warnings.warn(
+                    f"TimeIntervalInterpolator found no intervals for label: {label}"
+                )
+                continue
+
+            for start, end in intervals:
+                if start > end:
+                    warnings.warn(
+                        f"Invalid interval found for label: {label}, interval: ({start}, {end})"
+                    )
+                    continue
+                # Half-open interval [start, end): inclusive start, exclusive end
+                mask = (valid_times >= start) & (valid_times < end)
+                out[mask, i] = True
+
+        return out
+
+
 class ScreenTrial:
     def __init__(
         self,
@@ -548,7 +640,6 @@ def __init__(self, data_file_name, meta_data, cache_data: bool = False) -> None:
 
 class BlankTrial(ScreenTrial):
     def __init__(self, data_file_name, meta_data, cache_data: bool = False) -> None:
-
         self.interleave_value = meta_data.get("interleave_value")
 
         super().__init__(
@@ -567,7 +658,6 @@ def get_data_(self) -> np.array:
 
 class InvalidTrial(ScreenTrial):
     def __init__(self, data_file_name, meta_data, cache_data: bool = False) -> None:
-
         self.interleave_value = meta_data.get("interleave_value")
 
         super().__init__(
diff --git a/experanto/intervals.py b/experanto/intervals.py
@@ -28,7 +28,9 @@ def intersect(self, times: np.ndarray) -> np.ndarray:
         return np.where((times >= self.start) & (times <= self.end))[0]
 
 
-def uniquefy_interval_array(interval_array: List[TimeInterval]) -> List[TimeInterval]:
+def uniquefy_interval_array(
+    interval_array: List[TimeInterval],
+) -> List[TimeInterval]:
     """
     Takes an array of TimeIntervals and returns a new array where no intervals overlap.
     If intervals overlap or are adjacent, they are merged into a single interval.
@@ -92,7 +94,6 @@ def find_intersection_between_two_interval_arrays(
 def find_intersection_across_arrays_of_intervals(
     intervals_array: List[List[TimeInterval]],
 ) -> TimeInterval:
-
     common_interval_array = intervals_array[0]
 
     for interval_array in intervals_array[1:]:
diff --git a/tests/create_time_intervals_data.py b/tests/create_time_intervals_data.py
@@ -0,0 +1,128 @@
+import shutil
+from contextlib import closing, contextmanager
+from pathlib import Path
+
+import numpy as np
+import yaml
+
+from experanto.interpolators import Interpolator
+
+TIME_INTERVAL_ROOT = Path("tests/time_interval_data")
+
+
+@contextmanager
+def create_time_interval_data(
+    duration=10.0,
+    sampling_rate=30.0,
+    test_intervals=None,
+    train_intervals=None,
+    validation_intervals=None,
+):
+    """
+    Create time interval test data with non-integer timestamps.
+
+    Parameters
+    ----------
+    duration : float
+        Total duration of the recording in seconds.
+    sampling_rate : float
+        Sampling rate in Hz for generating timestamps.
+    test_intervals : list of [start, end], optional
+        List of time ranges for test label. Defaults to [[0.0, 2.0]].
+    train_intervals : list of [start, end], optional
+        List of time ranges for train label. Defaults to [[2.0, 4.0], [6.0, 8.0]].
+    validation_intervals : list of [start, end], optional
+        List of time ranges for validation label. Defaults to [[4.0, 6.0], [8.0, 10.0]].
+
+    Yields
+    ------
+    timestamps : np.ndarray
+        Array of timestamp values.
+    intervals_dict : dict
+        Dictionary mapping label names to their interval arrays.
+    """
+    try:
+        TIME_INTERVAL_ROOT.mkdir(parents=True, exist_ok=True)
+
+        # Default intervals
+        if test_intervals is None:
+            test_intervals = [[0.0, 2.0]]
+        if train_intervals is None:
+            train_intervals = [[2.0, 4.0], [6.0, 8.0]]
+        if validation_intervals is None:
+            validation_intervals = [[4.0, 6.0], [8.0, duration]]
+
+        # Generate non-integer timestamps
+        n_samples = int(duration * sampling_rate)
+        timestamps = np.linspace(0.0, duration, n_samples, endpoint=False)
+
+        # Create metadata
+        meta = {
+            "labels": {
+                "test": "test.npy",
+                "train": "train.npy",
+                "validation": "validation.npy",
+            },
+            "start_time": 0.0,
+            "end_time": duration,
+            "modality": "time_interval",
+        }
+
+        with open(TIME_INTERVAL_ROOT / "meta.yml", "w") as f:
+            yaml.dump(meta, f)
+
+        # Save interval files
+        test_array = np.array(test_intervals, dtype=np.float64)
+        train_array = np.array(train_intervals, dtype=np.float64)
+        validation_array = np.array(validation_intervals, dtype=np.float64)
+
+        np.save(TIME_INTERVAL_ROOT / "test.npy", test_array)
+        np.save(TIME_INTERVAL_ROOT / "train.npy", train_array)
+        np.save(TIME_INTERVAL_ROOT / "validation.npy", validation_array)
+
+        intervals_dict = {
+            "test": test_array,
+            "train": train_array,
+            "validation": validation_array,
+        }
+
+        yield timestamps, intervals_dict
+
+    finally:
+        shutil.rmtree(TIME_INTERVAL_ROOT, ignore_errors=True)
+
+
+@contextmanager
+def time_interval_data_and_interpolator(data_kwargs=None, interp_kwargs=None):
+    """
+    Create time interval test data and interpolator in one context manager.
+
+    This follows the pattern used in sequence_data_and_interpolator for consistency.
+
+    Parameters
+    ----------
+    data_kwargs : dict, optional
+        Keyword arguments to pass to create_time_interval_data.
+    interp_kwargs : dict, optional
+        Keyword arguments to pass to Interpolator.create.
+
+    Yields
+    ------
+    timestamps : np.ndarray
+        Array of timestamp values.
+    intervals_dict : dict
+        Dictionary mapping label names to their interval arrays.
+    interpolator : TimeIntervalInterpolator
+        The interpolator object.
+    """
+    data_kwargs = data_kwargs or {}
+    interp_kwargs = interp_kwargs or {}
+
+    with create_time_interval_data(**data_kwargs) as (
+        timestamps,
+        intervals_dict,
+    ):
+        with closing(
+            Interpolator.create("tests/time_interval_data", **interp_kwargs)
+        ) as time_interp:
+            yield timestamps, intervals_dict, time_interp
diff --git a/tests/test_time_intervals_interpolator.py b/tests/test_time_intervals_interpolator.py