Skip to content

Commit 370022c

Browse files
committed
Show metadata size
Fixes #2637
1 parent 28c9e48 commit 370022c

File tree

4 files changed

+60
-15
lines changed

4 files changed

+60
-15
lines changed

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
ID is e.g. a population name, rather than silently returning no samples.
99
(:user:`hyanwong`, :pr:`3344`)
1010

11+
**Features**
12+
13+
- Displaying a summary of the tree sequence now shows the metadata codec and
14+
size of the metadata for each table. (:user:`hyanwong`, :pr:`3343`, :issue:`2637`)
15+
1116
--------------------
1217
[1.0.0] - 2025-11-27
1318
--------------------

python/tests/test_highlevel.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,8 +1991,25 @@ def test_html_repr(self, ts):
19911991
assert len(html) > 5000
19921992
assert f"<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>" in html
19931993
assert f"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>" in html
1994-
for table in ts.tables.table_name_map:
1995-
assert f"<td>{table.capitalize()}</td>" in html
1994+
codecs = collections.defaultdict(int)
1995+
for table_name, table in ts.tables.table_name_map.items():
1996+
assert f"<td>{table_name.capitalize()}</td>" in html
1997+
if hasattr(table, "metadata_schema"):
1998+
schema = table.metadata_schema.schema
1999+
codec = schema["codec"] if schema else "raw"
2000+
codecs[codec] += 1
2001+
assert "<td>Metadata</td>" in html
2002+
assert "<th>Metadata</th>" in html
2003+
assert "<th>Metadata size</th>" in html
2004+
num_tables_with_metadata = 0
2005+
for codec, count in codecs.items():
2006+
assert html.count(f">{codec}</td>") == count
2007+
num_tables_with_metadata += count
2008+
# Only one table (provenances) has no metadata
2009+
assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
2010+
# All metadata tables should show the percentage metadata size
2011+
assert html.count("%)</td>") == num_tables_with_metadata
2012+
19962013
if ts.num_provenances > 0:
19972014
assert (
19982015
f"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>"
@@ -2027,8 +2044,21 @@ def test_str(self, ts):
20272044
assert len(s) > 999
20282045
assert re.search(rf"║Trees *│ *{ts.num_trees}║", s)
20292046
assert re.search(rf"║Time Units *│ *{ts.time_units}║", s)
2030-
for table in ts.tables.table_name_map:
2031-
assert re.search(rf"║{table.capitalize()} *│", s)
2047+
codecs = collections.defaultdict(int)
2048+
for table_name, table in ts.tables.table_name_map.items():
2049+
assert re.search(rf"║{table_name.capitalize()} *│", s)
2050+
if hasattr(table, "metadata_schema"):
2051+
schema = table.metadata_schema.schema
2052+
codec = schema["codec"] if schema else "raw"
2053+
codecs[codec] += 1
2054+
num_tables_with_metadata = 0
2055+
for codec, count in codecs.items():
2056+
assert s.count(codec) == count
2057+
num_tables_with_metadata += count
2058+
# Only one table (provenances) has no metadata
2059+
assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
2060+
# All metadata tables should show the percentage metadata size
2061+
assert s.count("%)") == num_tables_with_metadata
20322062

20332063
@pytest.mark.skip("FIXME nbytes")
20342064
def test_nbytes(self, tmp_path, ts_fixture):

python/tskit/trees.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4476,19 +4476,16 @@ def __str__(self):
44764476
["Sample Nodes", util.format_number(self.num_samples, sep=",")],
44774477
["Total Size", util.naturalsize(self.nbytes)],
44784478
]
4479-
header = ["Table", "Rows", "Size", "Has Metadata"]
4479+
header = ["Table", "Rows", "Size", "Metadata", "Metadata size"]
44804480
table_rows = []
44814481
for name, table in self.tables.table_name_map.items():
44824482
table_rows.append(
44834483
[
44844484
name.capitalize(),
44854485
f"{util.format_number(table.num_rows, sep=',')}",
44864486
util.naturalsize(table.nbytes),
4487-
(
4488-
"Yes"
4489-
if hasattr(table, "metadata") and len(table.metadata) > 0
4490-
else "No"
4491-
),
4487+
util.metadata_codec(table),
4488+
util.metadata_size(table),
44924489
]
44934490
)
44944491
return util.unicode_table(ts_rows, title="TreeSequence") + util.unicode_table(

python/tskit/util.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,17 +534,29 @@ def html_table(rows, *, header):
534534
"""
535535

536536

537+
def metadata_codec(table):
538+
if hasattr(table, "metadata_schema"):
539+
schema = table.metadata_schema.schema
540+
return "raw" if schema is None else schema.get("codec", "unknown")
541+
return ""
542+
543+
544+
def metadata_size(table):
545+
if hasattr(table, "metadata"):
546+
frac = len(table.metadata) / table.nbytes
547+
return f"{naturalsize(len(table.metadata))} ({frac:.0%})"
548+
return ""
549+
550+
537551
def tree_sequence_html(ts):
538552
table_rows = "".join(
539553
f"""
540554
<tr>
541555
<td>{name.capitalize()}</td>
542556
<td>{format_number(table.num_rows)}</td>
543557
<td>{naturalsize(table.nbytes)}</td>
544-
<td style="text-align: center;">
545-
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
546-
else ''}
547-
</td>
558+
<td style="text-align: center;">{metadata_codec(table)}</td>
559+
<td>{metadata_size(table)}</td>
548560
</tr>
549561
"""
550562
for name, table in ts.tables.table_name_map.items()
@@ -637,7 +649,8 @@ def tree_sequence_html(ts):
637649
<th style="line-height:21px;">Table</th>
638650
<th>Rows</th>
639651
<th>Size</th>
640-
<th>Has Metadata</th>
652+
<th>Metadata</th>
653+
<th>Metadata size</th>
641654
</tr>
642655
</thead>
643656
<tbody>

0 commit comments

Comments
 (0)