Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
bab246e
ruff
jokkalt Jan 6, 2026
575d3e5
Add returns dependency
snregales Dec 11, 2025
73f41f8
create railway logger decorator
snregales Dec 11, 2025
b57537d
Rewrite tests with the railway way containers in mind
snregales Dec 11, 2025
3ba3c82
Rewrite functions scratch-core with returns
snregales Dec 11, 2025
a9ec102
Create an abstract generic pipeline module
snregales Dec 11, 2025
c1bf7aa
Create some preprocessors pipelines
snregales Dec 11, 2025
aeedebf
Update preprocessors schemas
snregales Dec 11, 2025
48d92c6
Entegrate new pipeline structure to the pipeline
snregales Dec 11, 2025
315e0a3
Cleanup old/unused scratch core code
snregales Dec 11, 2025
ced309d
Remove unused api code
snregales Dec 11, 2025
fba3bd8
Make deptry happy
snregales Dec 11, 2025
6f44b47
remove surface normals
snregales Dec 12, 2025
df127e5
mask and crop functionality added (#67)
SimoneAriens Dec 17, 2025
b5fe028
Gaussian filter - filter_apply.m (#60)
SimoneAriens Dec 16, 2025
e03052f
Add map_level.m to translations (#66)
cfs-data Dec 17, 2025
3d2a00f
Resampling method added (#71)
SimoneAriens Dec 17, 2025
1252449
Feature/resampling mark (#72)
SimoneAriens Jan 5, 2026
cea1b14
eod
jokkalt Jan 6, 2026
73cffce
ruff
jokkalt Jan 7, 2026
95504b0
Some updates to resampling (#76)
cfs-data Jan 6, 2026
bb829d6
tests for rotation angle
jokkalt Jan 7, 2026
7cadb2f
rotation angle tests, fixtures and code
jokkalt Jan 7, 2026
871f4bc
added testfiles
jokkalt Jan 7, 2026
64d00d9
ruff
jokkalt Jan 7, 2026
e38dc16
some change
jokkalt Jan 8, 2026
872a04e
Merge branch 'main' into feature/rotate_cropped_image
jokkalt Jan 8, 2026
e29b9dd
lock file from main
jokkalt Jan 8, 2026
66579ef
rotate image; roughly
jokkalt Jan 8, 2026
39478f6
too many ruff errros
jokkalt Jan 8, 2026
b7a8acb
Merge branch 'main' into feature/rotate_cropped_image
jokkalt Jan 8, 2026
77bf026
small tests added
SimoneAriens Jan 9, 2026
0230eb9
start of cleanup artifacts
jokkalt Jan 12, 2026
d9c4e7f
Merge branch 'feature/rotate_cropped_image' of github.com:Netherlands…
jokkalt Jan 12, 2026
00e1277
cleanup artifacts simplified
jokkalt Jan 12, 2026
b887c02
updated tests and glued functions together. still buggy
jokkalt Jan 12, 2026
a4c1ea4
tests work, but fail:)
jokkalt Jan 13, 2026
12ed036
removed interpolation test case
jokkalt Jan 13, 2026
e9ca4ac
fixed rotation angle code
jokkalt Jan 13, 2026
edbee05
rotation_angle simplified with docstring
jokkalt Jan 13, 2026
2fbe663
More rotating and masking
jokkalt Jan 13, 2026
e453389
most recent changes
jokkalt Jan 14, 2026
09b84f6
bug fix
jokkalt Jan 14, 2026
7575030
new test cases
jokkalt Jan 14, 2026
aebe29e
test fix and rotate fix
jokkalt Jan 14, 2026
7f2a4b8
simplified
jokkalt Jan 14, 2026
ef7ed35
deleted matlab files
jokkalt Jan 14, 2026
8938242
numpy test files
jokkalt Jan 14, 2026
bf1dbb1
remove matlab file tests
jokkalt Jan 14, 2026
1c19a53
added numpy tests for matlab comparison
jokkalt Jan 14, 2026
930691b
cleanup old test files and lingering code/todos
jokkalt Jan 14, 2026
bc95474
Merge branch 'main' into feature/rotate_cropped_image
jokkalt Jan 14, 2026
5e98370
final test fixes
jokkalt Jan 14, 2026
f9598df
final test fixes
jokkalt Jan 14, 2026
7a65997
Updated rotate script and tests, updated determine bounding box
jokkalt Jan 15, 2026
742211b
added test_rotate and updated test_mask
jokkalt Jan 15, 2026
f05469d
restore gitattributes
jokkalt Jan 15, 2026
c6d8fa1
plural crop_infos for list
jokkalt Jan 15, 2026
2d751ff
last docstring changes
jokkalt Jan 15, 2026
9e223d7
Merge branch 'main' into feature/rotate_cropped_image
jokkalt Jan 15, 2026
1e753be
helper_function mess
jokkalt Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion packages/scratch-core/src/conversion/data_formats.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from enum import Enum, auto
from typing import Any

from pydantic import Field, computed_field

from container_models.base import ConfigBaseModel
from container_models.base import ConfigBaseModel, BaseModel
from container_models.scan_image import ScanImage


Expand Down Expand Up @@ -44,13 +45,31 @@ class CropType(Enum):
POLYGON = auto()


class CropInfo(BaseModel):
"""
Representation of the cropped area. Parameter `is_foreground` is used to indicate whether keep or delete the
selected area.

The points dict differs per CropType:
CIRCLE: {'center': array [x, y], 'radius': float}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any way we could enforce/test/validate this when creating a CropInfo?

RECTANGLE: {'corner': ScanMap2DArray}
POLYGON: {'point': ScanMap2DArray}
ELLIPSE: {'center': array [x, y], 'majoraxis': float, 'minoraxis': float, angle_majoraxis: float}
"""

data: dict[str, Any]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you can create a pydantic model per type and let pydantic figure out which one matches the parameters in data?

crop_type: CropType
is_foreground: bool


class Mark(ConfigBaseModel):
"""
Representation of a mark (impression or striation)
"""

scan_image: ScanImage
mark_type: MarkType
crop_infos: list[CropInfo]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use a tuple here instead of a list?

crop_type: CropType
meta_data: dict = Field(default_factory=dict)
_center: tuple[float, float] | None = None
Expand Down
30 changes: 26 additions & 4 deletions packages/scratch-core/src/conversion/mask.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

import numpy as np

from container_models.base import ScanMap2DArray, MaskArray
Expand Down Expand Up @@ -26,6 +28,7 @@ def mask_2d_array(
def crop_to_mask(
image: ScanMap2DArray,
mask: MaskArray,
margin: Optional[int] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
margin: Optional[int] = None,
margin: int | None = None,

) -> ScanMap2DArray:
"""
Crops an image to the bounding box of a mask.
Expand All @@ -34,15 +37,19 @@ def crop_to_mask(
:param mask: Binary mask
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add the new margin parameter to docstring

:return: Cropped image containing only the masked region
"""
y_slice, x_slice = _determine_bounding_box(mask)
x_slice, y_slice = _determine_bounding_box(mask, margin)
return image[y_slice, x_slice]


def _determine_bounding_box(mask: MaskArray) -> tuple[slice, slice]:
def _determine_bounding_box(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _determine_bounding_box(
def determine_bounding_box(

mask: MaskArray, margin: Optional[int] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mask: MaskArray, margin: Optional[int] = None
mask: MaskArray, margin: int | None = None

) -> tuple[slice, slice]:
"""
Determines the bounding box of non-zero values in a mask.
Determines the bounding box of non-zero values in a mask. If a margin is given, the bounding box will be expanded
(in case of a negative margin) or cropped (in case of a positive margin) by with 2 * margin pixels.

:param mask: Binary mask array
:param margin: Margin around the bounding box to either crop (positive) or extend (negative) the bounding box
:return: Tuple of (y_slice, x_slice) for the bounding box
Copy link
Collaborator

@cfs-data cfs-data Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Order of the tuple in return type here is not matching the output

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

switch as well

"""
non_zero_coords = np.nonzero(mask)
Expand All @@ -51,7 +58,22 @@ def _determine_bounding_box(mask: MaskArray) -> tuple[slice, slice]:

y_min, x_min = np.min(non_zero_coords, axis=1)
y_max, x_max = np.max(non_zero_coords, axis=1)
return slice(x_min, x_max + 1), slice(y_min, y_max + 1)

if margin:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if margin defaults to 0 then this is cleaner:

y_coords, x_coords = np.nonzero(mask)
y_min = max(0, y_coords.min() + margin)
y_max = min(mask.shape[0], y_coords.max() - margin + 1)
x_min = max(0, x_coords.min() + margin)
x_max = min(mask.shape[1], x_coords.max() - margin + 1)

x_min = max(0, x_min + margin)
x_max = min(mask.shape[1], x_max - margin + 1)
y_min = max(0, y_min + margin)
y_max = min(mask.shape[0], y_max - margin + 1)
else:
x_max += 1
y_max += 1

if x_min >= x_max:
raise ValueError("Slice results in x_min >= x_max. Margin may be too large.")
if y_min >= y_max:
raise ValueError("Slice results in y_min >= y_max. Margin may be too large.")

return slice(x_min, x_max), slice(y_min, y_max)


def mask_and_crop_2d_array(
Expand Down
163 changes: 163 additions & 0 deletions packages/scratch-core/src/conversion/remove_needles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import numpy as np

from container_models.base import MaskArray
from container_models.scan_image import ScanImage
from conversion.mask import mask_2d_array
from conversion.resample import resample_scan_image_and_mask
from conversion.utils import unwrap_result
from parsers import subsample_scan_image


def remove_needles(
scan_image: ScanImage, mask: MaskArray, times_median: float = 15.0
) -> ScanImage:
"""
Remove needle artifacts (outliers) from depth measurement data using median filtering.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are needle artifacts?

This function identifies and removes spike-like outliers in depth data by:
1. Applying median filtering to smooth the data
2. Computing residuals (difference between original and smoothed data)
3. Flagging points where residuals exceed a threshold based on median absolute deviation
4. Setting flagged outlier points to NaN
The function adapts its filtering strategy based on data size:
- For large datasets (>20 columns or rows): uses 2D median filtering with optional subsampling
- For small datasets (≤20 columns or rows): uses 1D median filtering with reduced filter size
:param scan_image: Scan image to clean.
:param mask: Binary mask array.
:param times_median: Parameter to help determine the outlier threshold.
:return: The cleaned scan image.
"""
filter_size_moderated = 5
Copy link
Collaborator

@cfs-data cfs-data Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this a module constant, or parameter with default value?

times_median = times_median * 6

# Check if this is a small strip of data
is_small_strip = scan_image.width <= 20 or scan_image.height <= 20
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change the magic value 20 into a module constant?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here you are checking for both width and height (as opposed to the matlab code) but further down you are assuming the width is the shortest?


if not is_small_strip:
# Calculate subsampling factor for computational efficiency
# Goal: 7 μm sampling with 70 μm filter diameter
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"sampling" is not clear here, do you mean pixel size / scale?

subsample_factor = int(
np.ceil(70e-6 / filter_size_moderated / scan_image.scale_x)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic value should be module constant

)

# Apply mask and prepare data
scan_image_masked = ScanImage(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a mask_and_crop_scan_image method for this

data=mask_2d_array(scan_image.data, mask),
scale_x=scan_image.scale_x,
scale_y=scan_image.scale_y,
)

if subsample_factor > 1:
scan_image_subsampled = unwrap_result(
subsample_scan_image(
scan_image_masked, subsample_factor, subsample_factor
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use named keywords when there are multiple keywords

)
)
# Apply median filter (using nanmedian equivalent)
scan_image_subsampled_filtered = apply_median_filter(
scan_image_subsampled, filter_size_moderated
)
# Upsample back to original resolution
upsample_factors = (1 / subsample_factor, 1 / subsample_factor)
scan_image_filtered, _ = resample_scan_image_and_mask(
scan_image_subsampled_filtered,
factors=upsample_factors,
only_downsample=False,
)

else:
# Apply median filter (using nanmedian equivalent)
scan_image_filtered = apply_median_filter(
scan_image_masked, filter_size_moderated
)

residual_image = (
scan_image_masked.data
- scan_image_filtered.data[
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is scan_image_filtered containing NaNs in the same places as scan_image_masked?

: scan_image.data.shape[0], : scan_image.data.shape[1]
]
)
else:
# For small strips: use 1D filtering with adjusted kernel size
Copy link
Collaborator

@cfs-data cfs-data Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is now very long. I think it would be beneficial to split the function up into smaller functions with single responsibilities, and only create a small pipeline / logical flow here

# Convert 2D filter size to 1D equivalent: sqrt(10) ≈ 3
filter_size_adjusted = int(np.round(np.sqrt(filter_size_moderated)))

scan_image_filtered = apply_median_filter(scan_image, filter_size_adjusted)

# Handle transposition for single-row data
if scan_image_filtered.width == 1:
residual_image = scan_image.data - scan_image_filtered.data.T
else:
residual_image = scan_image.data - scan_image_filtered.data

# Find outliers: points where |residual| > threshold
threshold = times_median * np.nanmedian(np.abs(residual_image))
indices_invalid = np.abs(residual_image) > threshold
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
indices_invalid = np.abs(residual_image) > threshold
mask_outliers = np.abs(residual_image) > threshold


# Remove outliers by setting them to NaN
scan_image_without_outliers = scan_image.data.copy()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not a scan_image anymore. And I was wondering whether we could make a update_mark_data-like function to cover all this boilerplate up?

scan_image_without_outliers[indices_invalid] = np.nan

return ScanImage(
data=scan_image_without_outliers,
scale_x=scan_image.scale_x,
scale_y=scan_image.scale_y,
)


def apply_median_filter(scan_image: ScanImage, filter_size: int) -> ScanImage:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a good idea to rewrite this implementation in a way that makes more clear what actually happens. From the code it seems we are applying a windowing function and compute the medians over the windows?

"""
Apply a fast median filter that handles NaN values.
This function computes a median filter by creating shifted versions of the input
image and taking the median across all shifts. NaN values are ignored during
the median calculation.
Notes
-----
This implementation uses circular shifts to create a 3D array of all
neighborhood pixels, then computes the median along the third dimension.
:param scan_image: Scan image to filter
:param filter_size: Size of the median filter kernel (will be made odd if even)
:return: Median-filtered scan image with the same shape as input_image
"""
# Make sure the filter size is odd
if filter_size % 2 == 0:
filter_size = filter_size + 1

# Pad the matrix with border_mult on all sides
pad_shape = (
scan_image.data.shape[0] + filter_size - 1,
scan_image.data.shape[1] + filter_size - 1,
)

input_image_border = np.ones(pad_shape) * np.nan
half_filter_size = (filter_size - 1) // 2
input_image_border[
half_filter_size:-half_filter_size, half_filter_size:-half_filter_size
] = scan_image.data

# Create 3D array to hold all shifted versions
input_image_array = np.ones((*pad_shape, filter_size**2), dtype=np.float32)

# Fill the array with circularly shifted versions
image_count = 0
for kernel_rows in range(-half_filter_size, half_filter_size + 1):
for kernel_columns in range(-half_filter_size, half_filter_size + 1):
input_image_array[:, :, image_count] = np.roll(
input_image_border, shift=(kernel_rows, kernel_columns), axis=(0, 1)
)
image_count += 1

# Remove borders and compute median
output_image_no_border = input_image_array[
half_filter_size:-half_filter_size, half_filter_size:-half_filter_size, :
]
output_image = np.nanmedian(output_image_no_border, axis=2)

return ScanImage(
data=output_image, scale_x=scan_image.scale_x, scale_y=scan_image.scale_y
)
2 changes: 1 addition & 1 deletion packages/scratch-core/src/conversion/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def resample_scan_image_and_mask(
:param mask: Corresponding mask array.
:param factors: The multipliers for the scale of the X- and Y-axis. The formula used is `new_scale = factor * old_scale`.
:param target_scale: Target scale (in meters) when `factors` are not provided.
:param preserve_aspect_ratio: Whether to preserve the aspect ratio of the image.
:param only_downsample: If True, only downsample data (default). If False, allow upsampling.
:param preserve_aspect_ratio: Whether to preserve the aspect ratio of the image.
:returns: Resampled ScanImage and MaskArray
"""
if not factors:
Expand Down
Loading
Loading