Skip to content

Commit af33ed1

Browse files
benjefferymergify[bot]
authored andcommitted
Add fixed-length arrays in metadata
1 parent 25a9994 commit af33ed1

File tree

5 files changed

+329
-18
lines changed

5 files changed

+329
-18
lines changed

docs/metadata.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,17 @@ types above. `L` is the default. As an example:
532532

533533
Will result in an array of 2 byte integers, prepended by a single-byte array-length.
534534

535+
For arrays with a known fixed size, you can specify the `length` property instead:
536+
```
537+
{"type": "array", "length": 3, "items": {"type":"number", "binaryFormat":"i"}}
538+
```
539+
This creates a fixed-length array of exactly 3 integers, without storing the array length in the encoded data.
540+
Fixed-length arrays are more space-efficient since they don't need to store the length prefix.
541+
542+
When using fixed-length arrays:
543+
1. The `arrayLengthFormat` property should not be specified
544+
2. Arrays provided for encoding must match the specified length exactly
545+
535546
For dealing with legacy encodings that do not store the
536547
length of the array, setting `noLengthEncodingExhaustBuffer` to `true` will read
537548
elements of the array until the metadata buffer is exhausted. As such an array

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
- Fix to ``TreeSequence.pair_coalescence_rates`` causing an
1717
assertion to be triggered by floating point error, when all coalescence events are inside a single time window (:user:`natep`, :issue:`3035`, :pr:`3038`)
1818

19+
**Features**
20+
21+
- Add support for fixed-length arrays in metadata struct codec using the ``length`` property.
22+
(:user:`benjeffery`, :issue:`3088`,:pr:`3090`)
23+
1924
--------------------
2025
[0.6.0] - 2024-10-16
2126
--------------------

python/requirements/development.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies:
1717
- jsonschema>=3.0.0
1818
- jupyter-book>=0.12.1
1919
- kastore
20+
- lshmm>=0.0.8
2021
- matplotlib
2122
- meson>=0.61.0
2223
- msprime>=1.0.0
@@ -36,12 +37,14 @@ dependencies:
3637
- sphinx-argparse
3738
- sphinx-autodoc-typehints>=1.18.3
3839
- sphinx-issues
40+
- sphinx-jupyterbook-latex
41+
- sphinxcontrib-prettyspecialmethods
3942
- sphinx-book-theme
43+
- pydata_sphinx_theme>=0.7.2
4044
- svgwrite>=1.1.10
4145
- tqdm
4246
- tszip
4347
- pip:
44-
- lshmm>=0.0.8
4548
- newick
4649
- xmlunittest
4750
- msgpack>=1.0.0

python/tests/test_metadata.py

Lines changed: 228 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MIT License
22
#
3-
# Copyright (c) 2018-2024 Tskit Developers
3+
# Copyright (c) 2018-2025 Tskit Developers
44
# Copyright (c) 2017 University of Oxford
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -1415,6 +1415,154 @@ def test_ordering_of_fields(self):
14151415
assert ms.validate_and_encode_row(row_data) == index_order_encoded
14161416
assert ms.decode_row(index_order_encoded) == row_data
14171417

1418+
def test_fixed_length_array(self):
1419+
schema = {
1420+
"codec": "struct",
1421+
"type": "object",
1422+
"properties": {
1423+
"array": {
1424+
"type": "array",
1425+
"length": 3,
1426+
"items": {"type": "number", "binaryFormat": "i"},
1427+
}
1428+
},
1429+
}
1430+
self.round_trip(schema, {"array": [1, 2, 3]})
1431+
1432+
# Test with complex fixed-length arrays
1433+
schema = {
1434+
"codec": "struct",
1435+
"type": "object",
1436+
"properties": {
1437+
"array": {
1438+
"type": "array",
1439+
"length": 2,
1440+
"items": {
1441+
"type": "object",
1442+
"properties": {
1443+
"int": {"type": "number", "binaryFormat": "i"},
1444+
"float": {"type": "number", "binaryFormat": "d"},
1445+
},
1446+
},
1447+
}
1448+
},
1449+
}
1450+
self.round_trip(
1451+
schema, {"array": [{"int": 1, "float": 1.1}, {"int": 2, "float": 2.2}]}
1452+
)
1453+
1454+
# Test fixed-length nested arrays
1455+
schema = {
1456+
"codec": "struct",
1457+
"type": "object",
1458+
"properties": {
1459+
"array": {
1460+
"type": "array",
1461+
"length": 2,
1462+
"items": {
1463+
"type": "array",
1464+
"length": 3,
1465+
"items": {"type": "number", "binaryFormat": "d"},
1466+
},
1467+
}
1468+
},
1469+
}
1470+
self.round_trip(schema, {"array": [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3]]})
1471+
1472+
def test_mixed_fixed_and_variable_arrays(self):
1473+
schema = {
1474+
"codec": "struct",
1475+
"type": "object",
1476+
"properties": {
1477+
"fixed_array": {
1478+
"type": "array",
1479+
"length": 3,
1480+
"items": {"type": "number", "binaryFormat": "i"},
1481+
},
1482+
"variable_array": {
1483+
"type": "array",
1484+
"items": {"type": "number", "binaryFormat": "i"},
1485+
},
1486+
},
1487+
}
1488+
self.round_trip(
1489+
schema, {"fixed_array": [1, 2, 3], "variable_array": [4, 5, 6, 7]}
1490+
)
1491+
self.round_trip(schema, {"fixed_array": [1, 2, 3], "variable_array": []})
1492+
1493+
# Nested case - array of objects where each object has
1494+
# both fixed and variable-length arrays
1495+
schema = {
1496+
"codec": "struct",
1497+
"type": "object",
1498+
"properties": {
1499+
"objects": {
1500+
"type": "array",
1501+
"items": {
1502+
"type": "object",
1503+
"properties": {
1504+
"fixed": {
1505+
"type": "array",
1506+
"length": 2,
1507+
"items": {"type": "number", "binaryFormat": "d"},
1508+
},
1509+
"variable": {
1510+
"type": "array",
1511+
"items": {"type": "number", "binaryFormat": "i"},
1512+
},
1513+
},
1514+
},
1515+
}
1516+
},
1517+
}
1518+
self.round_trip(
1519+
schema,
1520+
{
1521+
"objects": [
1522+
{"fixed": [1.1, 2.2], "variable": [1, 2, 3]},
1523+
{"fixed": [3.3, 4.4], "variable": [4]},
1524+
{"fixed": [5.5, 6.6], "variable": []},
1525+
]
1526+
},
1527+
)
1528+
1529+
def test_edge_case_zero_length_array(self):
1530+
schema = {
1531+
"codec": "struct",
1532+
"type": "object",
1533+
"properties": {
1534+
"empty_fixed": {
1535+
"type": "array",
1536+
"length": 0,
1537+
"items": {"type": "number", "binaryFormat": "i"},
1538+
}
1539+
},
1540+
}
1541+
self.round_trip(schema, {"empty_fixed": []})
1542+
1543+
# Can't provide non-empty array when length=0
1544+
ms = metadata.MetadataSchema(schema)
1545+
with pytest.raises(
1546+
ValueError, match="Array length 1 does not match schema fixed length 0"
1547+
):
1548+
ms.validate_and_encode_row({"empty_fixed": [1]})
1549+
1550+
# Complex object with zero-length array
1551+
schema = {
1552+
"codec": "struct",
1553+
"type": "object",
1554+
"properties": {
1555+
"name": {"type": "string", "binaryFormat": "10p"},
1556+
"empty_fixed": {
1557+
"type": "array",
1558+
"length": 0,
1559+
"items": {"type": "number", "binaryFormat": "i"},
1560+
},
1561+
"value": {"type": "number", "binaryFormat": "d"},
1562+
},
1563+
}
1564+
self.round_trip(schema, {"name": "test", "empty_fixed": [], "value": 42.0})
1565+
14181566

14191567
class TestStructCodecErrors:
14201568
def encode(self, schema, row_data):
@@ -1644,6 +1792,85 @@ def test_no_default_implies_required(self):
16441792
):
16451793
self.encode(schema, {})
16461794

1795+
def test_fixed_length_array_wrong_length(self):
1796+
schema = {
1797+
"codec": "struct",
1798+
"type": "object",
1799+
"properties": {
1800+
"array": {
1801+
"type": "array",
1802+
"length": 3,
1803+
"items": {"type": "number", "binaryFormat": "i"},
1804+
},
1805+
},
1806+
}
1807+
ms = metadata.MetadataSchema(schema)
1808+
1809+
with pytest.raises(
1810+
ValueError, match="Array length 2 does not match schema fixed length 3"
1811+
):
1812+
ms.validate_and_encode_row({"array": [1, 2]})
1813+
1814+
with pytest.raises(
1815+
ValueError, match="Array length 4 does not match schema fixed length 3"
1816+
):
1817+
ms.validate_and_encode_row({"array": [1, 2, 3, 4]})
1818+
1819+
def test_fixed_length_array_conflicts(self):
1820+
schema = {
1821+
"codec": "struct",
1822+
"type": "object",
1823+
"properties": {
1824+
"test": {
1825+
"type": "array",
1826+
"length": 3,
1827+
"noLengthEncodingExhaustBuffer": True,
1828+
"items": {"type": "number", "binaryFormat": "i"},
1829+
},
1830+
},
1831+
}
1832+
with pytest.raises(
1833+
exceptions.MetadataSchemaValidationError,
1834+
match="test array cannot have both 'length' and "
1835+
"'noLengthEncodingExhaustBuffer' set",
1836+
):
1837+
metadata.MetadataSchema(schema)
1838+
1839+
def test_fixed_length_with_length_format(self):
1840+
schema = {
1841+
"codec": "struct",
1842+
"type": "object",
1843+
"properties": {
1844+
"array": {
1845+
"type": "array",
1846+
"length": 3,
1847+
"arrayLengthFormat": "B",
1848+
"items": {"type": "number", "binaryFormat": "i"},
1849+
},
1850+
},
1851+
}
1852+
with pytest.raises(
1853+
exceptions.MetadataSchemaValidationError,
1854+
match="fixed-length array should not specify 'arrayLengthFormat'",
1855+
):
1856+
metadata.MetadataSchema(schema)
1857+
1858+
def test_negative_fixed_length(self):
1859+
"""Test that negative fixed-length values are rejected."""
1860+
schema = {
1861+
"codec": "struct",
1862+
"type": "object",
1863+
"properties": {
1864+
"array": {
1865+
"type": "array",
1866+
"length": -5,
1867+
"items": {"type": "number", "binaryFormat": "i"},
1868+
},
1869+
},
1870+
}
1871+
with pytest.raises(exceptions.MetadataSchemaValidationError):
1872+
metadata.MetadataSchema(schema)
1873+
16471874

16481875
class TestSLiMDecoding:
16491876
"""

0 commit comments

Comments
 (0)