Skip to content

Commit 8c4cece

Browse files
committed
unify sample access via constructor autoscale parameter
* Make slicing equivalent to reading for integer types * e.g. meta[0:10] == meta.read_samples(count=10) * Breaking API Change: autoscale and raw_components parameters removed from read methods * Move autoscale configuration from method parameters to SigMFFile constructor * Remove already deprecated raw_components parameter from all methods * Update read_samples() and read_samples_in_capture() to use instance autoscale setting * Add autoscale support to fromfile(), fromarchive(), and SigMFArchiveReader * Simplify __getitem__ with unified scaling behavior for consistency * increment minor version
1 parent dc7f7c5 commit 8c4cece

File tree

6 files changed

+136
-72
lines changed

6 files changed

+136
-72
lines changed

docs/source/quickstart.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,24 @@ Save a Numpy array as a SigMF Recording
8080
8181
# check for mistakes & write to disk
8282
meta.tofile('example_cf32.sigmf-meta') # extension is optional
83+
84+
--------------------------------
85+
Control Fixed-Point Data Scaling
86+
--------------------------------
87+
88+
For fixed-point datasets, you can control whether samples are automatically scaled to floating-point values:
89+
90+
.. code-block:: python
91+
92+
import sigmf
93+
94+
# Default behavior: autoscale fixed-point data to [-1.0, 1.0] range
95+
handle = sigmf.fromfile("fixed_point_data.sigmf")
96+
samples = handle.read_samples() # Returns float32/complex64
97+
98+
# Disable autoscaling to access raw integer values
99+
handle_raw = sigmf.fromfile("fixed_point_data.sigmf", autoscale=False)
100+
raw_samples = handle_raw.read_samples() # Returns original integer types
101+
102+
# Both slicing and read_samples() respect the autoscale setting
103+
assert handle[0:10].dtype == handle.read_samples(count=10).dtype

sigmf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# SPDX-License-Identifier: LGPL-3.0-or-later
66

77
# version of this python module
8-
__version__ = "1.2.14"
8+
__version__ = "1.3.0"
99
# matching version of the SigMF specification
1010
__specification__ = "1.2.5"
1111

sigmf/archivereader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ class SigMFArchiveReader:
2929
map_readonly : bool, optional
3030
Indicate whether assignments on the numpy.memmap are allowed.
3131
archive_buffer : buffer, optional
32-
32+
Alternative buffer to read archive from.
33+
autoscale : bool, optional
34+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
3335
3436
Raises
3537
------
@@ -41,7 +43,7 @@ class SigMFArchiveReader:
4143
If metadata is invalid.
4244
"""
4345

44-
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
46+
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None, autoscale=True):
4547
if name is not None:
4648
path = Path(name)
4749
if path.suffix != SIGMF_ARCHIVE_EXT:
@@ -90,7 +92,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
9092
if data_offset is None:
9193
raise SigMFFileError("No .sigmf-data file found in archive!")
9294

93-
self.sigmffile = SigMFFile(metadata=json_contents)
95+
self.sigmffile = SigMFFile(metadata=json_contents, autoscale=autoscale)
9496
self.sigmffile.validate()
9597

9698
self.sigmffile.set_data_file(

sigmf/sigmffile.py

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class SigMFFile(SigMFMetafile):
158158
]
159159
VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS}
160160

161-
def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True):
161+
def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True, autoscale=True):
162162
"""
163163
API for SigMF I/O
164164
@@ -174,13 +174,17 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu
174174
When True will skip calculating hash on data_file (if present) to check against metadata.
175175
map_readonly: bool, default True
176176
Indicates whether assignments on the numpy.memmap are allowed.
177+
autoscale: bool, default True
178+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
179+
for all sample reading operations including slicing.
177180
"""
178181
super().__init__()
179182
self.data_file = None
180183
self.data_buffer = None
181184
self.sample_count = 0
182185
self._memmap = None
183186
self.is_complex_data = False # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case
187+
self.autoscale = autoscale
184188

185189
self.set_metadata(metadata)
186190
if global_info is not None:
@@ -217,10 +221,39 @@ def __next__(self):
217221
def __getitem__(self, sli):
218222
mem = self._memmap[sli] # matches behavior of numpy.ndarray.__getitem__()
219223

224+
# original behavior: always apply _return_type conversion if set
220225
if self._return_type is None:
221-
return mem
222-
223-
# is_fixed_point and is_complex
226+
# no special conversion needed
227+
if not self.autoscale:
228+
return mem
229+
else:
230+
# apply autoscaling for fixed-point data when autoscale=True
231+
dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
232+
is_fixedpoint_data = dtype["is_fixedpoint"]
233+
234+
if is_fixedpoint_data:
235+
# apply scaling for fixed-point data
236+
is_unsigned_data = dtype["is_unsigned"]
237+
component_size = dtype["component_size"]
238+
data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4")
239+
240+
data = mem.astype(data_type_out)
241+
data = data.view(np.dtype("f4"))
242+
if is_unsigned_data:
243+
data -= 2 ** (component_size * 8 - 1)
244+
data *= 2 ** -(component_size * 8 - 1)
245+
data = data.view(data_type_out)
246+
if self.is_complex_data:
247+
data = data.view(np.complex64)
248+
# for single-channel complex data, flatten the last dimension
249+
if data.ndim > 1 and self.get_num_channels() == 1:
250+
data = data.flatten()
251+
return data[0] if isinstance(sli, int) else data
252+
else:
253+
# floating-point data, no scaling needed
254+
return mem
255+
256+
# handle complex data type conversion (original behavior)
224257
if self._memmap.ndim == 2:
225258
# num_channels == 1
226259
ray = mem[:, 0].astype(self._return_type) + 1.0j * mem[:, 1].astype(self._return_type)
@@ -511,9 +544,7 @@ def _count_samples(self):
511544
"It may be invalid data."
512545
)
513546
if self._get_sample_count_from_annotations() > sample_count:
514-
warnings.warn(
515-
f"Data source ends before the final annotation in the corresponding SigMF metadata."
516-
)
547+
warnings.warn(f"Data source ends before the final annotation in the corresponding SigMF metadata.")
517548
self.sample_count = sample_count
518549
return sample_count
519550

@@ -659,7 +690,7 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
659690
self.dump(fp, pretty=pretty)
660691
fp.write("\n") # text files should end in carriage return
661692

662-
def read_samples_in_capture(self, index=0, autoscale=True):
693+
def read_samples_in_capture(self, index=0):
663694
"""
664695
Reads samples from the specified captures segment in its entirety.
665696
@@ -682,9 +713,9 @@ def read_samples_in_capture(self, index=0, autoscale=True):
682713
"an integer number of samples across channels. It may be invalid."
683714
)
684715

685-
return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size(), autoscale, False)
716+
return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size())
686717

687-
def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=False):
718+
def read_samples(self, start_index=0, count=-1):
688719
"""
689720
Reads the specified number of samples starting at the specified index from the associated data file.
690721
@@ -694,16 +725,12 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
694725
Starting sample index from which to read.
695726
count : int, default -1
696727
Number of samples to read. -1 will read whole file.
697-
autoscale : bool, default True
698-
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
699-
raw_components : bool, default False
700-
If True read and return the sample components (individual I & Q for complex, samples for real)
701-
with no conversions or interleaved channels.
702728
703729
Returns
704730
-------
705731
data : ndarray
706732
Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY.
733+
Scaling behavior depends on the autoscale parameter set during construction.
707734
"""
708735
if count == 0:
709736
raise IOError("Number of samples must be greater than zero, or -1 for all samples.")
@@ -719,9 +746,9 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
719746

720747
if not self._is_conforming_dataset():
721748
warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous")
722-
return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False)
749+
return self._read_datafile(first_byte, count * self.get_num_channels())
723750

724-
def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
751+
def _read_datafile(self, first_byte, nitems):
725752
"""
726753
internal function for reading samples from datafile
727754
"""
@@ -751,18 +778,15 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
751778
# return reshaped view for num_channels
752779
# first dimension will be double size if `is_complex_data`
753780
data = data.reshape(data.shape[0] // num_channels, num_channels)
754-
if not raw_components:
755-
data = data.astype(data_type_out)
756-
if autoscale and is_fixedpoint_data:
757-
data = data.view(np.dtype("f4"))
758-
if is_unsigned_data:
759-
data -= 2 ** (component_size * 8 - 1)
760-
data *= 2 ** -(component_size * 8 - 1)
761-
data = data.view(data_type_out)
762-
if self.is_complex_data:
763-
data = data.view(np.complex64)
764-
else:
765-
data = data.view(component_type_in)
781+
data = data.astype(data_type_out)
782+
if self.autoscale and is_fixedpoint_data:
783+
data = data.view(np.dtype("f4"))
784+
if is_unsigned_data:
785+
data -= 2 ** (component_size * 8 - 1)
786+
data *= 2 ** -(component_size * 8 - 1)
787+
data = data.view(data_type_out)
788+
if self.is_complex_data:
789+
data = data.view(np.complex64)
766790

767791
if self.data_file is not None:
768792
fp.close()
@@ -1061,17 +1085,34 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None):
10611085
return None
10621086

10631087

1064-
def fromarchive(archive_path, dir=None, skip_checksum=False):
1088+
def fromarchive(archive_path, dir=None, skip_checksum=False, autoscale=True):
10651089
"""Extract an archive and return a SigMFFile.
10661090
10671091
The `dir` parameter is no longer used as this function has been changed to
10681092
access SigMF archives without extracting them.
1093+
1094+
Parameters
1095+
----------
1096+
archive_path: str
1097+
Path to `sigmf-archive` tarball.
1098+
dir: str, optional
1099+
No longer used. Kept for compatibility.
1100+
skip_checksum: bool, default False
1101+
Skip dataset checksum calculation.
1102+
autoscale: bool, default True
1103+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
1104+
1105+
Returns
1106+
-------
1107+
SigMFFile
1108+
Instance created from archive.
10691109
"""
10701110
from .archivereader import SigMFArchiveReader
1071-
return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile
1111+
1112+
return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum, autoscale=autoscale).sigmffile
10721113

10731114

1074-
def fromfile(filename, skip_checksum=False):
1115+
def fromfile(filename, skip_checksum=False, autoscale=True):
10751116
"""
10761117
Creates and returns a SigMFFile or SigMFCollection instance with metadata loaded from the specified file.
10771118
@@ -1087,6 +1128,8 @@ def fromfile(filename, skip_checksum=False):
10871128
Path for SigMF Metadata, Dataset, Archive or Collection (with or without extension).
10881129
skip_checksum: bool, default False
10891130
When True will not read entire dataset to calculate hash.
1131+
autoscale: bool, default True
1132+
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).
10901133
10911134
Returns
10921135
-------
@@ -1103,7 +1146,7 @@ def fromfile(filename, skip_checksum=False):
11031146
ext = file_path.suffix
11041147

11051148
if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn):
1106-
return fromarchive(archive_fn, skip_checksum=skip_checksum)
1149+
return fromarchive(archive_fn, skip_checksum=skip_checksum, autoscale=autoscale)
11071150

11081151
if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn):
11091152
collection_fp = open(collection_fn, "rb")
@@ -1123,7 +1166,7 @@ def fromfile(filename, skip_checksum=False):
11231166
meta_fp.close()
11241167

11251168
data_fn = get_dataset_filename_from_metadata(meta_fn, metadata)
1126-
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum)
1169+
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum, autoscale=autoscale)
11271170

11281171

11291172
def get_sigmf_filenames(filename):

tests/test_archivereader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_access_data_without_untar(self):
6060
if complex_prefix == "c":
6161
# complex data will be half as long
6262
target_count //= 2
63-
self.assertTrue(np.all(np.iscomplex(readback_samples)))
63+
self.assertTrue(np.iscomplexobj(readback_samples))
6464
if num_channels != 1:
6565
# check expected # of channels
6666
self.assertEqual(

0 commit comments

Comments
 (0)