Skip to content

Commit 664da6c

Browse files
committed
Lazy load values and allow to side load a master CDF to patch non ISTP compliant CDF files
Signed-off-by: Alexis Jeandet <[email protected]>
1 parent 149abcb commit 664da6c

File tree

11 files changed

+109
-43
lines changed

11 files changed

+109
-43
lines changed

pyistp/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@
77
from .loader import ISTPLoader as _ISTPLoader
88

99

10-
def load(file=None, buffer=None) -> _ISTPLoader:
11-
return _ISTPLoader(file=file, buffer=buffer)
10+
def load(file=None, buffer=None, master_file=None, master_buffer=None) -> _ISTPLoader:
11+
return _ISTPLoader(file=file, buffer=buffer, master_file=master_file, master_buffer=master_buffer)

pyistp/_impl.py

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
from .drivers import current_driver
1+
from .drivers import current_driver, Driver
22
from .data_variable import DataVariable
33
from .support_data_variable import SupportDataVariable
44
import re
55
import numpy as np
6-
from typing import List
6+
from typing import List, Optional
77
import logging
88

99
DEPEND_REGEX = re.compile("DEPEND_\\d")
@@ -13,13 +13,13 @@
1313
log = logging.getLogger(__name__)
1414

1515

16-
def _get_attributes(cdf: object, var: str):
16+
def _get_attributes(master_cdf: Driver, cdf: Driver, var: str):
1717
attrs = {}
18-
for attr in cdf.variable_attributes(var):
19-
value = cdf.variable_attribute_value(var, attr)
18+
for attr in master_cdf.variable_attributes(var):
19+
value = master_cdf.variable_attribute_value(var, attr)
2020
if attr.endswith("_PTR") or attr[:-1].endswith("_PTR_"):
21-
if cdf.has_variable(value):
22-
value = cdf.values(value, is_metadata_variable=True)
21+
if master_cdf.has_variable(value):
22+
value = master_cdf.values(value, is_metadata_variable=True)
2323
if hasattr(value, 'tolist'):
2424
attrs[attr] = value.tolist()
2525
else:
@@ -33,26 +33,27 @@ def _get_attributes(cdf: object, var: str):
3333
return attrs
3434

3535

36-
def _get_axis(cdf: object, axis_var: str, data_var: str):
36+
def _get_axis(master_cdf: Driver, cdf: Driver, axis_var: str, data_var: str):
3737
if cdf.has_variable(axis_var):
3838
if cdf.is_char(axis_var):
39-
if 'sig_digits' in cdf.variable_attributes(axis_var): # cluster CSA trick :/
39+
if 'sig_digits' in master_cdf.variable_attributes(axis_var): # cluster CSA trick :/
4040
return SupportDataVariable(name=axis_var, values=np.asarray(cdf.values(axis_var), dtype=float),
41-
attributes=_get_attributes(cdf, axis_var))
41+
attributes=_get_attributes(master_cdf, cdf, axis_var))
4242
return SupportDataVariable(name=axis_var, values=cdf.values(axis_var),
43-
attributes=_get_attributes(cdf, axis_var))
43+
attributes=_get_attributes(master_cdf, cdf, axis_var))
4444
else:
4545
log.warning(
4646
f"{ISTP_NOT_COMPLIANT_W}: trying to load {axis_var} as support data for {data_var} but it is absent from the file")
4747
return None
4848

4949

50-
def _get_axes(cdf: object, var: str, data_shape):
51-
attrs = sorted(filter(lambda attr: DEPEND_REGEX.match(attr), cdf.variable_attributes(var)))
52-
unix_time_name = cdf.variable_attribute_value(var, "DEPEND_TIME")
53-
axes = list(map(lambda attr: _get_axis(cdf, cdf.variable_attribute_value(var, attr), var), attrs))
54-
if unix_time_name is not None and unix_time_name in cdf.variables():
55-
unix_time = _get_axis(cdf, unix_time_name, var)
50+
def _get_axes(master_cdf: Driver, cdf: Driver, var: str, data_shape):
51+
attrs = sorted(filter(lambda attr: DEPEND_REGEX.match(attr), master_cdf.variable_attributes(var)))
52+
unix_time_name = master_cdf.variable_attribute_value(var, "DEPEND_TIME")
53+
axes = list(
54+
map(lambda attr: _get_axis(master_cdf, cdf, master_cdf.variable_attribute_value(var, attr), var), attrs))
55+
if unix_time_name is not None and unix_time_name in master_cdf.variables():
56+
unix_time = _get_axis(master_cdf, cdf, unix_time_name, var)
5657
if len(unix_time) == data_shape[0] and len(axes[0].values) != data_shape[0]:
5758
unix_time.values = (unix_time.values * 1e9).astype('<M8[ns]')
5859
axes[0] = unix_time
@@ -68,47 +69,52 @@ def _get_labels(attributes) -> List[str]:
6869
return [attributes['LABLAXIS']]
6970

7071

71-
def _load_data_var(cdf: object, var: str) -> DataVariable or None:
72-
values = cdf.values(var)
73-
axes = _get_axes(cdf, var, values.shape)
74-
attributes = _get_attributes(cdf, var)
72+
def _load_data_var(master_cdf: Driver, cdf: Driver, var: str) -> DataVariable or None:
73+
values = lambda: cdf.values(var)
74+
shape = cdf.shape(var)
75+
axes = _get_axes(master_cdf, cdf, var, shape)
76+
attributes = _get_attributes(master_cdf, cdf, var)
7577
labels = _get_labels(attributes)
7678
if len(axes) == 0:
7779
log.warning(f"{ISTP_NOT_COMPLIANT_W}: {var} was marked as data variable but it has 0 support variable")
7880
return None
79-
if None in axes or axes[0].values.shape[0] != values.shape[0]:
81+
if None in axes or axes[0].values.shape[0] != shape[0]:
8082
return None
8183
return DataVariable(name=var, values=values, attributes=attributes, axes=axes, labels=labels)
8284

8385

8486
class ISTPLoaderImpl:
85-
cdf = None
87+
cdf: Optional[Driver] = None
8688

87-
def __init__(self, file=None, buffer=None):
89+
def __init__(self, file=None, buffer=None, master_file=None, master_buffer=None):
8890
if file is not None:
8991
log.debug(f"Loading {file}")
9092
self.cdf = current_driver(file or buffer)
93+
if master_file or master_buffer:
94+
self.master_cdf = current_driver(master_file or master_buffer)
95+
else:
96+
self.master_cdf = self.cdf
9197
self.data_variables = []
9298
self._update_data_vars_lis()
9399

94100
def attributes(self):
95-
return self.cdf.attributes()
101+
return self.master_cdf.attributes()
96102

97103
def attribute(self, key):
98-
return self.cdf.attribute(key)
104+
return self.master_cdf.attribute(key)
99105

100106
def _update_data_vars_lis(self):
101-
if self.cdf:
107+
if self.master_cdf:
102108
self.data_variables = []
103-
for var in self.cdf.variables():
104-
var_attrs = self.cdf.variable_attributes(var)
105-
var_type = self.cdf.variable_attribute_value(var, 'VAR_TYPE')
106-
param_type = (self.cdf.variable_attribute_value(var,
107-
'PARAMETER_TYPE') or "").lower() # another cluster CSA crap
108-
if (var_type == 'data' or param_type == 'data') and not self.cdf.is_char(var):
109+
for var in self.master_cdf.variables():
110+
var_attrs = self.master_cdf.variable_attributes(var)
111+
var_type = self.master_cdf.variable_attribute_value(var, 'VAR_TYPE')
112+
param_type = (self.master_cdf.variable_attribute_value(var,
113+
'PARAMETER_TYPE') or "").lower() # another cluster CSA crap
114+
if (var_type == 'data' or param_type == 'data') and not self.master_cdf.is_char(var):
109115
self.data_variables.append(var)
110116
if len(self.data_variables) == 0:
111117
log.warning(f"{ISTP_NOT_COMPLIANT_W}: No data variable found, this is suspicious")
112118

113119
def data_variable(self, var_name) -> DataVariable:
114-
return _load_data_var(self.cdf, var_name)
120+
return _load_data_var(self.master_cdf, self.cdf, var_name)

pyistp/data_variable.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,25 @@
1+
from typing import Collection, Callable, Any, Union
2+
13

24
class DataVariable:
3-
__slots__ = ("name", "values", 'attributes', 'axes', 'labels')
5+
__slots__ = ("name", "_values", 'attributes', 'axes', 'labels')
46

5-
def __init__(self, name, values, attributes, axes, labels=None):
7+
def __init__(self, name: str, values: Union[Collection[Any], Callable], attributes, axes, labels=None):
68
self.name = name
7-
self.values = values
9+
self._values = values
810
self.attributes = attributes
911
self.axes = axes or []
1012
self.labels = labels
1113

14+
@property
15+
def values(self):
16+
if callable(self._values):
17+
self._values = self._values()
18+
return self._values
19+
1220
def __len__(self):
1321
return len(self.values)
22+
1423
def __repr__(self):
1524
return f"""DataVariable: {self.name}
1625
Attributes:

pyistp/drivers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import logging, os
22
import importlib
3+
from typing import Callable, ByteString
4+
from ._driver import Driver
35

46
log = logging.getLogger(__name__)
57

68

7-
def _load_cdf_lib():
9+
def _load_cdf_lib() -> Callable[[str or ByteString], Driver]:
810
available_libs = ["pycdfpp", "spacepy"]
911
try_first_lib = os.environ.get("PYISTP_CDFLIB", "pycdfpp")
1012
available_libs.remove(try_first_lib)

pyistp/drivers/_driver.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Protocol, runtime_checkable, List, AnyStr, Collection, Any
2+
3+
4+
class Driver(Protocol):
5+
6+
def attributes(self) -> List[AnyStr]:
7+
...
8+
9+
def attribute(self, key: AnyStr):
10+
...
11+
12+
def data_variables(self) -> List[AnyStr]:
13+
...
14+
15+
def data_variable(self, var_name: AnyStr):
16+
...
17+
18+
def variable_attribute_value(self, var: AnyStr, attr: AnyStr) -> Any:
19+
...
20+
21+
def variable_attributes(self, var: AnyStr) -> List[AnyStr]:
22+
...
23+
24+
def values(self, var: AnyStr, is_metadata_variable: bool = False) -> Collection:
25+
...
26+
27+
def shape(self, var: AnyStr) -> Collection[int]:
28+
...
29+
30+
def variables(self) -> List[AnyStr]:
31+
...
32+
33+
def has_variable(self, name: str) -> bool:
34+
...
35+
36+
def is_char(self, var: str) -> bool:
37+
...

pyistp/drivers/pycdfpp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,6 @@ def values(self, var, is_metadata_variable=False):
5454
if is_metadata_variable and self.is_char(var):
5555
return _drop_first_dim_if_nrv(v.is_nrv, v.values_encoded)
5656
return _drop_first_dim_if_nrv(v.is_nrv, v.values)
57+
58+
def shape(self, var):
59+
return self.cdf[var].shape

pyistp/drivers/spacepy.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def variable_attributes(self, var):
3636
return self.cdf[var].attrs.keys()
3737
return []
3838

39-
def is_char(self,var):
39+
def is_char(self, var):
4040
return self.cdf[var].type() == pycdf.const.CDF_CHAR
4141

4242
def variable_attribute_value(self, var, attr):
@@ -49,3 +49,6 @@ def values(self, var, is_metadata_variable=False):
4949
if v.type() in (pycdf.const.CDF_EPOCH, pycdf.const.CDF_EPOCH16, pycdf.const.CDF_TIME_TT2000):
5050
return np.vectorize(np.datetime64)(v[:])
5151
return v[:]
52+
53+
def shape(self, var):
54+
return self.cdf[var].shape

pyistp/loader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
class ISTPLoader:
66

7-
def __init__(self, file=None, buffer=None):
7+
def __init__(self, file=None, buffer=None, master_file=None, master_buffer=None):
88
from ._impl import ISTPLoaderImpl
9-
self._impl = ISTPLoaderImpl(file=file, buffer=buffer)
9+
self._impl = ISTPLoaderImpl(file=file, buffer=buffer, master_file=master_file, master_buffer=master_buffer)
1010

1111
def attributes(self):
1212
return self._impl.attributes()
98.6 KB
Binary file not shown.
220 KB
Binary file not shown.

0 commit comments

Comments
 (0)