From f12b21c311a4fde8e65df85e6af64f329ecca3fd Mon Sep 17 00:00:00 2001 From: Arjun Mehta Date: Mon, 20 Jan 2025 23:02:36 +0530 Subject: [PATCH 1/5] feat: validate undeclared SigMF extensions in metadata --- sigmf/validate.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sigmf/validate.py b/sigmf/validate.py index 0694332..cb9094a 100644 --- a/sigmf/validate.py +++ b/sigmf/validate.py @@ -25,6 +25,34 @@ from . import error, schema, sigmffile +def _get_extension_namespaces(metadata): + """Get set of declared extension namespaces from global core:extensions.""" + extensions = metadata.get("global", {}).get("core:extensions", []) + return {ext["name"].split(":")[0] for ext in extensions} + +def _get_used_extensions(metadata): + """Find all extension namespaces actually used in the metadata.""" + used = set() + + # Helper function to check keys in a dictionary + def check_dict(d): + for key in d: + if ":" in key: + namespace = key.split(":")[0] + if namespace != "core": + used.add(namespace) + + # Check all sections + for section in ["global", "captures", "annotations"]: + if section in metadata: + if isinstance(metadata[section], dict): + check_dict(metadata[section]) + elif isinstance(metadata[section], list): + for item in metadata[section]: + check_dict(item) + + return used + def validate(metadata, ref_schema=schema.get_schema()) -> None: """ Check that the provided `metadata` dict is valid according to the `ref_schema` dict. @@ -46,6 +74,17 @@ def validate(metadata, ref_schema=schema.get_schema()) -> None: """ jsonschema.validators.validate(instance=metadata, schema=ref_schema) + # Check extensions + declared_extensions = _get_extension_namespaces(metadata) + used_extensions = _get_used_extensions(metadata) + + undeclared = used_extensions - declared_extensions + if undeclared: + raise jsonschema.exceptions.ValidationError( + f"Found undeclared extensions in use: {', '.join(sorted(undeclared))}. " + "All extensions must be declared in global:core:extensions." + ) + # ensure captures and annotations have monotonically increasing sample_start for key in ["captures", "annotations"]: count = -1 From 772a75192a78eb6acda17e77ab22238f31986af9 Mon Sep 17 00:00:00 2001 From: Teque5 Date: Sat, 20 Sep 2025 22:27:54 -0700 Subject: [PATCH 2/5] raise warning instead of ValidationError; add test --- sigmf/validate.py | 26 ++++++++++++++++---------- tests/test_validation.py | 24 ++++++++++++++++++++---- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/sigmf/validate.py b/sigmf/validate.py index cb9094a..c4ea344 100644 --- a/sigmf/validate.py +++ b/sigmf/validate.py @@ -11,6 +11,7 @@ import logging import os import sys +import warnings # multi-threading library - should work well as I/O will be the primary # cost for small SigMF files. Swap to ProcessPool if files are large. @@ -30,18 +31,19 @@ def _get_extension_namespaces(metadata): extensions = metadata.get("global", {}).get("core:extensions", []) return {ext["name"].split(":")[0] for ext in extensions} + def _get_used_extensions(metadata): """Find all extension namespaces actually used in the metadata.""" used = set() - - # Helper function to check keys in a dictionary - def check_dict(d): - for key in d: + + def check_dict(ddd): + """Recursively check keys in a dictionary for namespaces.""" + for key in ddd: if ":" in key: namespace = key.split(":")[0] if namespace != "core": used.add(namespace) - + # Check all sections for section in ["global", "captures", "annotations"]: if section in metadata: @@ -50,9 +52,10 @@ def check_dict(d): elif isinstance(metadata[section], list): for item in metadata[section]: check_dict(item) - + return used + def validate(metadata, ref_schema=schema.get_schema()) -> None: """ Check that the provided `metadata` dict is valid according to the `ref_schema` dict. @@ -74,15 +77,18 @@ def validate(metadata, ref_schema=schema.get_schema()) -> None: """ jsonschema.validators.validate(instance=metadata, schema=ref_schema) - # Check extensions + # Check for namespace extensions declared_extensions = _get_extension_namespaces(metadata) used_extensions = _get_used_extensions(metadata) - + undeclared = used_extensions - declared_extensions if undeclared: - raise jsonschema.exceptions.ValidationError( + warnings.warn( f"Found undeclared extensions in use: {', '.join(sorted(undeclared))}. " - "All extensions must be declared in global:core:extensions." + "All extensions should be declared in core:extensions. " + "This will raise a ValidationError in future versions.", + DeprecationWarning, + stacklevel=2, ) # ensure captures and annotations have monotonically increasing sample_start diff --git a/tests/test_validation.py b/tests/test_validation.py index 05720b9..37a56bf 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -6,6 +6,7 @@ """Tests for Validator""" +import copy import tempfile import unittest from pathlib import Path @@ -18,9 +19,12 @@ from .testdata import TEST_FLOAT32_DATA, TEST_METADATA -def test_valid_data(): - """ensure the default metadata is OK""" - SigMFFile(TEST_METADATA).validate() +class NominalCases(unittest.TestCase): + """Cases where the validator should succeed.""" + + def test_nominal(self): + """nominal case should pass""" + SigMFFile(TEST_METADATA).validate() class CommandLineValidator(unittest.TestCase): @@ -75,7 +79,7 @@ class FailingCases(unittest.TestCase): """Cases where the validator should raise an exception.""" def setUp(self): - self.metadata = dict(TEST_METADATA) + self.metadata = copy.deepcopy(TEST_METADATA) def test_no_version(self): """core:version must be present""" @@ -128,3 +132,15 @@ def test_invalid_hash(self): self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.HASH_KEY] = "derp" with self.assertRaises(sigmf.error.SigMFFileError): SigMFFile(metadata=self.metadata, data_file=temp_file.name) + +class CheckNamespace(unittest.TestCase): + """Cases where namespace issues are involved""" + + def setUp(self): + self.metadata = copy.deepcopy(TEST_METADATA) + + def test_raises_warning(self): + """unknown namespace should raise a warning""" + self.metadata["global"]["other_namespace:key"] = 0 + with self.assertWarns(Warning): + SigMFFile(self.metadata).validate() From 35ec11018c7faedc7e56fdf8f5b3f39eb12e680b Mon Sep 17 00:00:00 2001 From: Teque5 Date: Sat, 20 Sep 2025 22:58:47 -0700 Subject: [PATCH 3/5] add test case for declared extension --- tests/test_validation.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/test_validation.py b/tests/test_validation.py index 37a56bf..7a68ad8 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -24,7 +24,7 @@ class NominalCases(unittest.TestCase): def test_nominal(self): """nominal case should pass""" - SigMFFile(TEST_METADATA).validate() + SigMFFile(copy.deepcopy(TEST_METADATA)).validate() class CommandLineValidator(unittest.TestCase): @@ -36,7 +36,7 @@ def setUp(self): self.tmp_path = tmp_path = Path(self.tmp_dir.name) junk_path = tmp_path / "junk" TEST_FLOAT32_DATA.tofile(junk_path) - some_meta = SigMFFile(TEST_METADATA, data_file=junk_path) + some_meta = SigMFFile(copy.deepcopy(TEST_METADATA), data_file=junk_path) some_meta.tofile(tmp_path / "a") some_meta.tofile(tmp_path / "b") some_meta.tofile(tmp_path / "c", toarchive=True) @@ -139,8 +139,19 @@ class CheckNamespace(unittest.TestCase): def setUp(self): self.metadata = copy.deepcopy(TEST_METADATA) - def test_raises_warning(self): + def test_undeclared_namespace(self): """unknown namespace should raise a warning""" - self.metadata["global"]["other_namespace:key"] = 0 + self.metadata[SigMFFile.GLOBAL_KEY]["other_namespace:key"] = 0 with self.assertWarns(Warning): SigMFFile(self.metadata).validate() + + def test_undeclared_namespace(self): + """known namespace should not raise a warning""" + self.metadata[SigMFFile.GLOBAL_KEY]["other_namespace:key"] = 0 + # define other_namespace + self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.EXTENSIONS_KEY] = [{ + "name": "other_namespace", + "version": "0.0.1", + "optional": True, + }] + SigMFFile(self.metadata).validate() From 3ccab511875766f2945d61c07e2308685eb203ff Mon Sep 17 00:00:00 2001 From: Teque5 Date: Sun, 21 Sep 2025 10:05:07 -0700 Subject: [PATCH 4/5] add another test with declared namespace --- sigmf/sigmffile.py | 1 - sigmf/validate.py | 2 +- tests/test_validation.py | 22 +++++++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 37e6fe0..1203c53 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -604,7 +604,6 @@ def validate(self): """ Check schema and throw error if issue. """ - version = self.get_global_field(self.VERSION_KEY) validate.validate(self._metadata, self.get_schema()) def archive(self, name=None, fileobj=None): diff --git a/sigmf/validate.py b/sigmf/validate.py index c4ea344..9cb02d0 100644 --- a/sigmf/validate.py +++ b/sigmf/validate.py @@ -166,7 +166,7 @@ def main(arg_tuple: Optional[Tuple[str, ...]] = None) -> None: n_total = len(paths) # estimate number of CPU cores # https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python - est_num_workers = len(os.sched_getaffinity(0)) if os.name == 'posix' else os.cpu_count() + est_num_workers = len(os.sched_getaffinity(0)) if os.name == "posix" else os.cpu_count() # create a thread pool # https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor with ThreadPoolExecutor(max_workers=est_num_workers) as executor: diff --git a/tests/test_validation.py b/tests/test_validation.py index 7a68ad8..eaff000 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -82,10 +82,11 @@ def setUp(self): self.metadata = copy.deepcopy(TEST_METADATA) def test_no_version(self): - """core:version must be present""" - del self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY] + """version key must be present""" + meta = SigMFFile(copy.deepcopy(self.metadata)) + del meta._metadata[SigMFFile.GLOBAL_KEY][SigMFFile.VERSION_KEY] with self.assertRaises(ValidationError): - SigMFFile(self.metadata).validate() + meta.validate() def test_extra_top_level_key(self): """no extra keys allowed on the top level""" @@ -133,6 +134,7 @@ def test_invalid_hash(self): with self.assertRaises(sigmf.error.SigMFFileError): SigMFFile(metadata=self.metadata, data_file=temp_file.name) + class CheckNamespace(unittest.TestCase): """Cases where namespace issues are involved""" @@ -145,13 +147,15 @@ def test_undeclared_namespace(self): with self.assertWarns(Warning): SigMFFile(self.metadata).validate() - def test_undeclared_namespace(self): + def test_declared_namespace(self): """known namespace should not raise a warning""" self.metadata[SigMFFile.GLOBAL_KEY]["other_namespace:key"] = 0 # define other_namespace - self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.EXTENSIONS_KEY] = [{ - "name": "other_namespace", - "version": "0.0.1", - "optional": True, - }] + self.metadata[SigMFFile.GLOBAL_KEY][SigMFFile.EXTENSIONS_KEY] = [ + { + "name": "other_namespace", + "version": "0.0.1", + "optional": False, + } + ] SigMFFile(self.metadata).validate() From ba86696751762cb69db301d665e93766e32bcd1e Mon Sep 17 00:00:00 2001 From: Teque5 Date: Sun, 21 Sep 2025 10:24:18 -0700 Subject: [PATCH 5/5] polish and increment patch version --- sigmf/__init__.py | 2 +- sigmf/validate.py | 39 +++++++++++++++++---------------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/sigmf/__init__.py b/sigmf/__init__.py index 4e51d46..273683e 100644 --- a/sigmf/__init__.py +++ b/sigmf/__init__.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later # version of this python module -__version__ = "1.2.10" +__version__ = "1.2.11" # matching version of the SigMF specification __specification__ = "1.2.5" diff --git a/sigmf/validate.py b/sigmf/validate.py index 9cb02d0..0b5cc84 100644 --- a/sigmf/validate.py +++ b/sigmf/validate.py @@ -26,32 +26,30 @@ from . import error, schema, sigmffile -def _get_extension_namespaces(metadata): - """Get set of declared extension namespaces from global core:extensions.""" - extensions = metadata.get("global", {}).get("core:extensions", []) +def _get_namespaces_declared(metadata: dict) -> set: + """Get set of declared extension namespaces.""" + extensions = metadata.get("global", {}).get(sigmffile.SigMFFile.EXTENSIONS_KEY, []) return {ext["name"].split(":")[0] for ext in extensions} -def _get_used_extensions(metadata): - """Find all extension namespaces actually used in the metadata.""" +def _get_namespaces_used(metadata: dict) -> set: + """Get set of used extension namespaces.""" used = set() - def check_dict(ddd): - """Recursively check keys in a dictionary for namespaces.""" + def check_dict(ddd: dict): + """Check keys for non-core namespaces.""" for key in ddd: if ":" in key: namespace = key.split(":")[0] if namespace != "core": used.add(namespace) - # Check all sections - for section in ["global", "captures", "annotations"]: - if section in metadata: - if isinstance(metadata[section], dict): - check_dict(metadata[section]) - elif isinstance(metadata[section], list): - for item in metadata[section]: - check_dict(item) + for section in metadata: + if isinstance(metadata[section], dict): + check_dict(metadata[section]) + elif isinstance(metadata[section], list): + for item in metadata[section]: + check_dict(item) return used @@ -77,15 +75,12 @@ def validate(metadata, ref_schema=schema.get_schema()) -> None: """ jsonschema.validators.validate(instance=metadata, schema=ref_schema) - # Check for namespace extensions - declared_extensions = _get_extension_namespaces(metadata) - used_extensions = _get_used_extensions(metadata) - - undeclared = used_extensions - declared_extensions + # check namespaces + undeclared = _get_namespaces_used(metadata) - _get_namespaces_declared(metadata) if undeclared: warnings.warn( f"Found undeclared extensions in use: {', '.join(sorted(undeclared))}. " - "All extensions should be declared in core:extensions. " + f"All extensions should be declared in {sigmffile.SigMFFile.EXTENSIONS_KEY}. " "This will raise a ValidationError in future versions.", DeprecationWarning, stacklevel=2, @@ -95,7 +90,7 @@ def validate(metadata, ref_schema=schema.get_schema()) -> None: for key in ["captures", "annotations"]: count = -1 for item in metadata[key]: - new_count = item["core:sample_start"] + new_count = item[sigmffile.SigMFFile.START_INDEX_KEY] if new_count < count: raise jsonschema.exceptions.ValidationError(f"{key} has incorrect sample start ordering.") count = new_count