NetherlandsForensicInstitute · jokkalt · Jan 6, 2026 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/packages/scratch-core/src/conversion/data_formats.py b/packages/scratch-core/src/conversion/data_formats.py
@@ -1,8 +1,9 @@
 from enum import Enum, auto
+from typing import Any
 
 from pydantic import Field, computed_field
 
-from container_models.base import ConfigBaseModel
+from container_models.base import ConfigBaseModel, BaseModel
 from container_models.scan_image import ScanImage
 
 
@@ -44,13 +45,31 @@ class CropType(Enum):
     POLYGON = auto()
 
 
+class CropInfo(BaseModel):
+    """
+    Representation of the cropped area. Parameter `is_foreground` is used to indicate whether keep or delete the
+    selected area.
+
+    The points dict differs per CropType:
+    CIRCLE: {'center': array [x, y], 'radius': float}
+    RECTANGLE: {'corner': ScanMap2DArray}
+    POLYGON: {'point': ScanMap2DArray}
+    ELLIPSE: {'center': array [x, y], 'majoraxis': float, 'minoraxis': float, angle_majoraxis: float}
+    """
+
+    data: dict[str, Any]
+    crop_type: CropType
+    is_foreground: bool
+
+
 class Mark(ConfigBaseModel):
     """
     Representation of a mark (impression or striation)
     """
 
     scan_image: ScanImage
     mark_type: MarkType
+    crop_infos: list[CropInfo]
     crop_type: CropType
     meta_data: dict = Field(default_factory=dict)
     _center: tuple[float, float] | None = None

diff --git a/packages/scratch-core/src/conversion/mask.py b/packages/scratch-core/src/conversion/mask.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 
 from container_models.base import ScanMap2DArray, MaskArray
@@ -26,6 +28,7 @@ def mask_2d_array(
 def crop_to_mask(
     image: ScanMap2DArray,
     mask: MaskArray,
+    margin: Optional[int] = None,
-    margin: Optional[int] = None,
+    margin: int | None = None,
-    margin: Optional[int] = None,
+    margin: int | None = None,
 ) -> ScanMap2DArray:
     """
     Crops an image to the bounding box of a mask.
@@ -34,15 +37,19 @@ def crop_to_mask(
     :param mask: Binary mask
     :return: Cropped image containing only the masked region
     """
-    y_slice, x_slice = _determine_bounding_box(mask)
+    x_slice, y_slice = _determine_bounding_box(mask, margin)
     return image[y_slice, x_slice]
 
 
-def _determine_bounding_box(mask: MaskArray) -> tuple[slice, slice]:
+def _determine_bounding_box(
-def _determine_bounding_box(
+def determine_bounding_box(
-def _determine_bounding_box(
+def determine_bounding_box(
+    mask: MaskArray, margin: Optional[int] = None
-    mask: MaskArray, margin: Optional[int] = None
+    mask: MaskArray, margin: int | None = None
-    mask: MaskArray, margin: Optional[int] = None
+    mask: MaskArray, margin: int | None = None
+) -> tuple[slice, slice]:
     """
-    Determines the bounding box of non-zero values in a mask.
+    Determines the bounding box of non-zero values in a mask. If a margin is given, the bounding box will be expanded
+    (in case of a negative margin) or cropped (in case of a positive margin) by with 2 * margin pixels.
 
     :param mask: Binary mask array
+    :param margin: Margin around the bounding box to either crop (positive) or extend (negative) the bounding box
     :return: Tuple of (y_slice, x_slice) for the bounding box
     """
     non_zero_coords = np.nonzero(mask)
@@ -51,7 +58,22 @@ def _determine_bounding_box(mask: MaskArray) -> tuple[slice, slice]:
 
     y_min, x_min = np.min(non_zero_coords, axis=1)
     y_max, x_max = np.max(non_zero_coords, axis=1)
-    return slice(x_min, x_max + 1), slice(y_min, y_max + 1)
+
+    if margin:
+        x_min = max(0, x_min + margin)
+        x_max = min(mask.shape[1], x_max - margin + 1)
+        y_min = max(0, y_min + margin)
+        y_max = min(mask.shape[0], y_max - margin + 1)
+    else:
+        x_max += 1
+        y_max += 1
+
+    if x_min >= x_max:
+        raise ValueError("Slice results in x_min >= x_max. Margin may be too large.")
+    if y_min >= y_max:
+        raise ValueError("Slice results in y_min >= y_max. Margin may be too large.")
+
+    return slice(x_min, x_max), slice(y_min, y_max)
 
 
 def mask_and_crop_2d_array(

diff --git a/packages/scratch-core/src/conversion/remove_needles.py b/packages/scratch-core/src/conversion/remove_needles.py
@@ -0,0 +1,163 @@
+import numpy as np
+
+from container_models.base import MaskArray
+from container_models.scan_image import ScanImage
+from conversion.mask import mask_2d_array
+from conversion.resample import resample_scan_image_and_mask
+from conversion.utils import unwrap_result
+from parsers import subsample_scan_image
+
+
+def remove_needles(
+    scan_image: ScanImage, mask: MaskArray, times_median: float = 15.0
+) -> ScanImage:
+    """
+    Remove needle artifacts (outliers) from depth measurement data using median filtering.
+
+    This function identifies and removes spike-like outliers in depth data by:
+    1. Applying median filtering to smooth the data
+    2. Computing residuals (difference between original and smoothed data)
+    3. Flagging points where residuals exceed a threshold based on median absolute deviation
+    4. Setting flagged outlier points to NaN
+
+    The function adapts its filtering strategy based on data size:
+    - For large datasets (>20 columns or rows): uses 2D median filtering with optional subsampling
+    - For small datasets (≤20 columns or rows): uses 1D median filtering with reduced filter size
+
+    :param scan_image: Scan image to clean.
+    :param mask: Binary mask array.
+    :param times_median: Parameter to help determine the outlier threshold.
+    :return: The cleaned scan image.
+    """
+    filter_size_moderated = 5
+    times_median = times_median * 6
+
+    # Check if this is a small strip of data
+    is_small_strip = scan_image.width <= 20 or scan_image.height <= 20
+
+    if not is_small_strip:
+        # Calculate subsampling factor for computational efficiency
+        # Goal: 7 μm sampling with 70 μm filter diameter
+        subsample_factor = int(
+            np.ceil(70e-6 / filter_size_moderated / scan_image.scale_x)
+        )
+
+        # Apply mask and prepare data
+        scan_image_masked = ScanImage(
+            data=mask_2d_array(scan_image.data, mask),
+            scale_x=scan_image.scale_x,
+            scale_y=scan_image.scale_y,
+        )
+
+        if subsample_factor > 1:
+            scan_image_subsampled = unwrap_result(
+                subsample_scan_image(
+                    scan_image_masked, subsample_factor, subsample_factor
+                )
+            )
+            # Apply median filter (using nanmedian equivalent)
+            scan_image_subsampled_filtered = apply_median_filter(
+                scan_image_subsampled, filter_size_moderated
+            )
+            # Upsample back to original resolution
+            upsample_factors = (1 / subsample_factor, 1 / subsample_factor)
+            scan_image_filtered, _ = resample_scan_image_and_mask(
+                scan_image_subsampled_filtered,
+                factors=upsample_factors,
+                only_downsample=False,
+            )
+
+        else:
+            # Apply median filter (using nanmedian equivalent)
+            scan_image_filtered = apply_median_filter(
+                scan_image_masked, filter_size_moderated
+            )
+
+        residual_image = (
+            scan_image_masked.data
+            - scan_image_filtered.data[
+                : scan_image.data.shape[0], : scan_image.data.shape[1]
+            ]
+        )
+    else:
+        # For small strips: use 1D filtering with adjusted kernel size
+        # Convert 2D filter size to 1D equivalent: sqrt(10) ≈ 3
+        filter_size_adjusted = int(np.round(np.sqrt(filter_size_moderated)))
+
+        scan_image_filtered = apply_median_filter(scan_image, filter_size_adjusted)
+
+        # Handle transposition for single-row data
+        if scan_image_filtered.width == 1:
+            residual_image = scan_image.data - scan_image_filtered.data.T
+        else:
+            residual_image = scan_image.data - scan_image_filtered.data
+
+    # Find outliers: points where |residual| > threshold
+    threshold = times_median * np.nanmedian(np.abs(residual_image))
+    indices_invalid = np.abs(residual_image) > threshold
-    indices_invalid = np.abs(residual_image) > threshold
+    mask_outliers = np.abs(residual_image) > threshold
-    indices_invalid = np.abs(residual_image) > threshold
+    mask_outliers = np.abs(residual_image) > threshold
+
+    # Remove outliers by setting them to NaN
+    scan_image_without_outliers = scan_image.data.copy()
+    scan_image_without_outliers[indices_invalid] = np.nan
+
+    return ScanImage(
+        data=scan_image_without_outliers,
+        scale_x=scan_image.scale_x,
+        scale_y=scan_image.scale_y,
+    )
+
+
+def apply_median_filter(scan_image: ScanImage, filter_size: int) -> ScanImage:
+    """
+    Apply a fast median filter that handles NaN values.
+
+    This function computes a median filter by creating shifted versions of the input
+    image and taking the median across all shifts. NaN values are ignored during
+    the median calculation.
+
+    Notes
+    -----
+    This implementation uses circular shifts to create a 3D array of all
+    neighborhood pixels, then computes the median along the third dimension.
+
+    :param scan_image: Scan image to filter
+    :param filter_size: Size of the median filter kernel (will be made odd if even)
+    :return: Median-filtered scan image with the same shape as input_image
+    """
+    # Make sure the filter size is odd
+    if filter_size % 2 == 0:
+        filter_size = filter_size + 1
+
+    # Pad the matrix with border_mult on all sides
+    pad_shape = (
+        scan_image.data.shape[0] + filter_size - 1,
+        scan_image.data.shape[1] + filter_size - 1,
+    )
+
+    input_image_border = np.ones(pad_shape) * np.nan
+    half_filter_size = (filter_size - 1) // 2
+    input_image_border[
+        half_filter_size:-half_filter_size, half_filter_size:-half_filter_size
+    ] = scan_image.data
+
+    # Create 3D array to hold all shifted versions
+    input_image_array = np.ones((*pad_shape, filter_size**2), dtype=np.float32)
+
+    # Fill the array with circularly shifted versions
+    image_count = 0
+    for kernel_rows in range(-half_filter_size, half_filter_size + 1):
+        for kernel_columns in range(-half_filter_size, half_filter_size + 1):
+            input_image_array[:, :, image_count] = np.roll(
+                input_image_border, shift=(kernel_rows, kernel_columns), axis=(0, 1)
+            )
+            image_count += 1
+
+    # Remove borders and compute median
+    output_image_no_border = input_image_array[
+        half_filter_size:-half_filter_size, half_filter_size:-half_filter_size, :
+    ]
+    output_image = np.nanmedian(output_image_no_border, axis=2)
+
+    return ScanImage(
+        data=output_image, scale_x=scan_image.scale_x, scale_y=scan_image.scale_y
+    )
diff --git a/packages/scratch-core/src/conversion/resample.py b/packages/scratch-core/src/conversion/resample.py
@@ -27,8 +27,8 @@ def resample_scan_image_and_mask(
     :param mask: Corresponding mask array.
     :param factors: The multipliers for the scale of the X- and Y-axis. The formula used is `new_scale = factor * old_scale`.
     :param target_scale: Target scale (in meters) when `factors` are not provided.
-    :param preserve_aspect_ratio: Whether to preserve the aspect ratio of the image.
     :param only_downsample: If True, only downsample data (default). If False, allow upsampling.
+    :param preserve_aspect_ratio: Whether to preserve the aspect ratio of the image.
     :returns: Resampled ScanImage and MaskArray
     """
     if not factors: