22import json
33import shutil
44import warnings
5+ from importlib .metadata import version
56from pathlib import Path
6- from typing import cast
7+ from typing import Annotated , Any , cast
78
89import awkward as ak
910import numpy as np
1617from more_itertools import mark_ends
1718from natsort import natsorted
1819from numpy .typing import NDArray
20+ from packaging .version import Version
21+ from pydantic import BaseModel , BeforeValidator , PlainSerializer , WithJsonSchema
1922from seqpro .rag import OFFSET_TYPE
2023from tqdm .auto import tqdm
2124
2629from ._utils import splits_sum_le_value
2730
2831
32+ class Metadata (BaseModel , arbitrary_types_allowed = True ):
33+ samples : list [str ]
34+ contigs : list [str ]
35+ n_regions : int
36+ ploidy : int | None = None
37+ max_jitter : int = 0
38+ version : (
39+ Annotated [
40+ Version ,
41+ BeforeValidator (lambda v : Version (v ) if isinstance (v , str ) else v ),
42+ PlainSerializer (lambda v : str (v ), return_type = str ),
43+ WithJsonSchema ({"type" : "string" }, mode = "serialization" ),
44+ ]
45+ | None
46+ ) = None
47+
48+ @property
49+ def n_samples (self ) -> int :
50+ return len (self .samples )
51+
52+
2953def write (
3054 path : str | Path ,
3155 bed : str | Path | pl .DataFrame ,
@@ -77,7 +101,7 @@ def write(
77101
78102 max_mem = parse_memory (max_mem )
79103
80- metadata = {}
104+ metadata : dict [ str , Any ] = {"version" : Version ( version ( "genvarloader" )) }
81105 path = Path (path )
82106 if path .exists () and overwrite :
83107 logger .info ("Found existing GVL store, overwriting." )
@@ -147,7 +171,6 @@ def write(
147171
148172 logger .info (f"Using { len (samples )} samples." )
149173 metadata ["samples" ] = samples
150- metadata ["n_samples" ] = len (samples )
151174 metadata ["n_regions" ] = gvl_bed .height
152175
153176 if variants is not None :
@@ -172,8 +195,9 @@ def write(
172195 for bw in bigwigs :
173196 _write_bigwigs (path , gvl_bed , bw , samples , max_mem )
174197
198+ _metadata = Metadata (** metadata )
175199 with open (path / "metadata.json" , "w" ) as f :
176- json .dump (metadata , f )
200+ json .dump (_metadata . model_dump () , f )
177201
178202 logger .info ("Finished writing." )
179203 warnings .simplefilter ("default" )
0 commit comments