Skip to content

Commit 0d6c3c1

Browse files
rcjacksonRobert JacksonRobert JacksonAdamTheisen
authored
ADD: Function to create dummy QC variable from multiple ancillary QC variables. (#964)
* Revert "MNT: Update codeowners list. (#958)" This reverts commit 876763e. * ADD: Newest files * FIX: Old changes from CODEOWNERS * FIX: Run pre-commit hooks, ruff ruff. * ADD: Baseline image for comparison. * ENH: Updating to use built in functions * ENH: No longer need to check for dummy * ENH: Updating test * ADD: New updated plot * DEL: Renaming file * ENH: Removing sys imports * ENH:" Precommits * ENH: lint changes * ENH: Updates for multiple qc ancillary variables --------- Co-authored-by: Robert Jackson <[email protected]> Co-authored-by: Robert Jackson <[email protected]> Co-authored-by: AdamTheisen <[email protected]>
1 parent b605f22 commit 0d6c3c1

File tree

6 files changed

+163
-24
lines changed

6 files changed

+163
-24
lines changed

act/plotting/timeseriesdisplay.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,6 @@ def qc_flag_block_plot(
17631763
)
17641764
if qc_data_field is None:
17651765
raise ValueError(f'No quality control ancillary variable in Dataset for {data_field}')
1766-
17671766
flag_masks = self._ds[dsname][qc_data_field].attrs['flag_masks']
17681767
flag_meanings = self._ds[dsname][qc_data_field].attrs['flag_meanings']
17691768
flag_assessments = self._ds[dsname][qc_data_field].attrs['flag_assessments']

act/qc/qcfilter.py

Lines changed: 130 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def __init__(self, ds):
2727
"""initialize"""
2828
self._ds = ds
2929

30-
def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, flag_type=False):
30+
def check_for_ancillary_qc(
31+
self, var_name, add_if_missing=True, cleanup=False, flag_type=False, ignore_dims=False
32+
):
3133
"""
3234
Method to check if a quality control variable exist in the dataset
3335
and return the quality control varible name.
@@ -42,16 +44,18 @@ def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, f
4244
var_name : str
4345
Data variable name.
4446
add_if_missing : boolean
45-
Add quality control variable if missing from teh dataset. Will raise
47+
Add quality control variable if missing from the dataset. Will raise
4648
and exception if the var_name does not exist in Dataset. Set to False
4749
to not raise exception.
4850
cleanup : boolean
4951
Option to run qc.clean.cleanup() method on the dataset
5052
to ensure the dataset was updated from ARM QC to the
5153
correct standardized QC.
52-
flag_type : boolean
54+
flag_type : booleany
5355
Indicating the QC variable uses flag_values instead of
5456
flag_masks.
57+
ignore_dims : booleany
58+
Boolean to ignore ancillary variables that match dimensionally
5559
5660
Returns
5761
-------
@@ -74,7 +78,7 @@ def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, f
7478
print(f'qc_var_name: {qc_var_name}')
7579
7680
"""
77-
qc_var_name = None
81+
qc_var_name = []
7882
try:
7983
ancillary_variables = self._ds[var_name].attrs['ancillary_variables']
8084
var_dims = self._ds[var_name].dims
@@ -83,16 +87,17 @@ def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, f
8387
for var in ancillary_variables:
8488
for attr, value in self._ds[var].attrs.items():
8589
if attr == 'standard_name' and 'quality_flag' in value:
86-
if var_dims == self._ds[var].dims:
87-
qc_var_name = var
88-
90+
if ignore_dims is True:
91+
qc_var_name.append(var)
92+
elif var_dims == self._ds[var].dims:
93+
qc_var_name.append(var)
8994
if add_if_missing and qc_var_name is None:
9095
qc_var_name = self._ds.qcfilter.create_qc_variable(var_name, flag_type=flag_type)
9196

9297
except KeyError:
9398
# Since no ancillary_variables exist look for ARM style of QC
9499
# variable name. If it exists use it else create new
95-
# QC varaible.
100+
# QC variable.
96101
if add_if_missing:
97102
try:
98103
self._ds['qc_' + var_name]
@@ -102,6 +107,11 @@ def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, f
102107
var_name, flag_type=flag_type
103108
)
104109

110+
if len(qc_var_name) == 1:
111+
qc_var_name = qc_var_name[0]
112+
if len(qc_var_name) == 0:
113+
qc_var_name = None
114+
105115
# Make sure data varaible has a variable attribute linking
106116
# data variable to QC variable.
107117
if add_if_missing:
@@ -226,7 +236,7 @@ def update_ancillary_variable(self, var_name, qc_var_name=None):
226236
qc_var_name : str
227237
quality control variable name. If not given will attempt
228238
to get the name from data variable ancillary_variables
229-
attribute.
239+
attribute. Defaults to first ancillary variable.
230240
231241
Examples
232242
--------
@@ -244,14 +254,24 @@ def update_ancillary_variable(self, var_name, qc_var_name=None):
244254
"""
245255
if qc_var_name is None:
246256
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False)
257+
if isinstance(qc_var_name, list):
258+
qc_var_name = qc_var_name[0]
247259

248260
if qc_var_name is None:
249261
return
250262

263+
if isinstance(qc_var_name, str):
264+
qc_var_name_list = [qc_var_name]
265+
else:
266+
qc_var_name_list = qc_var_name
267+
251268
try:
252269
ancillary_variables = self._ds[var_name].attrs['ancillary_variables']
253-
if qc_var_name not in ancillary_variables:
254-
ancillary_variables = ' '.join([ancillary_variables, qc_var_name])
270+
if isinstance(ancillary_variables, list):
271+
ancillary_variables = ' '.join(ancillary_variables)
272+
for qc_var in qc_var_name_list:
273+
if qc_var not in ancillary_variables:
274+
ancillary_variables = ' '.join([ancillary_variables, qc_var])
255275
except KeyError:
256276
ancillary_variables = qc_var_name
257277

@@ -266,6 +286,7 @@ def add_test(
266286
test_assessment='Bad',
267287
flag_value=False,
268288
recycle=False,
289+
qc_var_name=None,
269290
):
270291
"""
271292
Method to add a new test/filter to a quality control variable.
@@ -296,6 +317,8 @@ def add_test(
296317
Option to use number less than next highest test if available. For example
297318
tests 1, 2, 4, 5 are set. Set to true the next test chosen will be 3, else
298319
will be 6.
320+
qc_var_name : str
321+
QC variable to add test to. Defaults to first ancillary QC variable
299322
300323
Returns
301324
-------
@@ -329,8 +352,10 @@ def add_test(
329352
# Ensure assessment is capitalized to be consistent
330353
test_assessment = test_assessment.capitalize()
331354

332-
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, flag_type=flag_value)
333-
355+
if qc_var_name is None:
356+
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, flag_type=flag_value)
357+
if isinstance(qc_var_name, list):
358+
qc_var_name = qc_var_name[0]
334359
if test_number is None:
335360
test_number = self._ds.qcfilter.available_bit(qc_var_name, recycle=recycle)
336361

@@ -425,6 +450,8 @@ def remove_test(
425450

426451
if var_name is not None:
427452
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name)
453+
if isinstance(qc_var_name, list):
454+
qc_var_name = qc_var_name[0]
428455

429456
# Determine which index is using the test number
430457
index = None
@@ -492,7 +519,7 @@ def remove_test(
492519
del flag_assessments[index]
493520
self._ds[qc_var_name].attrs['flag_assessments'] = flag_assessments
494521

495-
def set_test(self, var_name, index=None, test_number=None, flag_value=False):
522+
def set_test(self, var_name, index=None, test_number=None, flag_value=False, qc_var_name=None):
496523
"""
497524
Method to set a test/filter in a quality control variable.
498525
@@ -507,6 +534,8 @@ def set_test(self, var_name, index=None, test_number=None, flag_value=False):
507534
Test number to set.
508535
flag_value : boolean
509536
Switch to use flag_values integer quality control.
537+
qc_var_name : str
538+
QC data variable name. If None, will default to first ancillary QC variable.
510539
511540
Examples
512541
--------
@@ -517,7 +546,10 @@ def set_test(self, var_name, index=None, test_number=None, flag_value=False):
517546
518547
"""
519548

520-
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name)
549+
if qc_var_name is None:
550+
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name)
551+
if isinstance(qc_var_name, list):
552+
qc_var_name = qc_var_name[0]
521553

522554
qc_variable = np.array(self._ds[qc_var_name].values)
523555

@@ -556,11 +588,11 @@ def set_test(self, var_name, index=None, test_number=None, flag_value=False):
556588
def unset_test(
557589
self,
558590
var_name=None,
559-
qc_var_name=None,
560591
index=None,
561592
test_number=None,
562593
flag_value=False,
563594
flag_values_reset_value=0,
595+
qc_var_name=None,
564596
):
565597
"""
566598
Method to unset a test/filter from a quality control variable.
@@ -569,8 +601,6 @@ def unset_test(
569601
----------
570602
var_name : str or None
571603
Data variable name.
572-
qc_var_name : str or None
573-
Quality control variable name. Ignored if var_name is set.
574604
index : int or list or numpy array
575605
Index to unset test in quality control array. If want to
576606
unset all values will need to pass in index of all values.
@@ -580,6 +610,9 @@ def unset_test(
580610
Switch to use flag_values integer quality control.
581611
flag_values_reset_value : int
582612
Value to use when resetting a flag_values value to not be set.
613+
qc_var_name : str or None
614+
Quality control variable name. Ignored if var_name is set.
615+
Will default to first ancillary QC variable
583616
584617
Examples
585618
--------
@@ -599,6 +632,8 @@ def unset_test(
599632

600633
if var_name is not None:
601634
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name)
635+
if isinstance(qc_var_name, list):
636+
qc_var_name = qc_var_name[0]
602637

603638
# Get QC variable
604639
qc_variable = self._ds[qc_var_name].values
@@ -647,9 +682,15 @@ def available_bit(self, qc_var_name, recycle=False):
647682
648683
649684
"""
685+
650686
try:
651687
flag_masks = self._ds[qc_var_name].attrs['flag_masks']
652688
flag_value = False
689+
if len(flag_masks) == 0:
690+
if 'flag_values' in self._ds[qc_var_name].attrs:
691+
flag_masks = self._ds[qc_var_name].attrs['flag_values']
692+
if len(flag_masks) > 0:
693+
flag_value = True
653694
except KeyError:
654695
try:
655696
flag_masks = self._ds[qc_var_name].attrs['flag_values']
@@ -665,7 +706,6 @@ def available_bit(self, qc_var_name, recycle=False):
665706
'available_bit(). flag_values and '
666707
'flag_masks not set as expected'
667708
)
668-
669709
if flag_masks == []:
670710
next_bit = 1
671711
else:
@@ -704,6 +744,7 @@ def get_qc_test_mask(
704744
Test number to return array where test is set.
705745
qc_var_name : str or None
706746
Quality control variable name. Ignored if var_name is set.
747+
Defaults to first ancillary QC variable
707748
flag_value : boolean
708749
Switch to use flag_values integer quality control.
709750
return_index : boolean
@@ -762,10 +803,19 @@ def get_qc_test_mask(
762803
'keyword when calling the get_qc_test_mask() method'
763804
)
764805

765-
if var_name is not None:
806+
if var_name is not None and qc_var_name is None:
766807
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name)
808+
if isinstance(qc_var_name, list):
809+
qc_var_name = qc_var_name[0]
767810

768-
qc_variable = self._ds[qc_var_name].values
811+
qc_variable = self._ds[qc_var_name]
812+
if var_name is not None:
813+
# Ensure that the qc_variable matches the data variable shape
814+
if qc_variable.dims != self._ds[var_name].dims:
815+
# Tile the qc_variable to match the data variable shape
816+
qc_variable = qc_variable.broadcast_like(self._ds[var_name])
817+
818+
qc_variable = qc_variable.values
769819
# Ensure the qc_variable data type is integer. This ensures bitwise comparison
770820
# will not cause an error.
771821
if qc_variable.dtype.kind not in np.typecodes['AllInteger']:
@@ -798,6 +848,7 @@ def get_masked_data(
798848
ma_fill_value=None,
799849
return_inverse=False,
800850
return_mask_only=False,
851+
qc_var_name=None,
801852
):
802853
"""
803854
Returns a numpy masked array containing data and mask or
@@ -826,6 +877,8 @@ def get_masked_data(
826877
where failing.
827878
return_mask_only : boolean
828879
Return the boolean mask only as a numpy array.
880+
qc_var_name : str
881+
Variable name for QC data to use. Defaults to first ancillary QC variable.
829882
830883
Returns
831884
-------
@@ -861,7 +914,10 @@ def get_masked_data(
861914
)
862915
863916
"""
864-
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False)
917+
if qc_var_name is None:
918+
qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False)
919+
if isinstance(qc_var_name, list):
920+
qc_var_name = qc_var_name[0]
865921

866922
flag_value = False
867923
flag_values = None
@@ -1033,6 +1089,8 @@ def datafilter(
10331089

10341090
for var_name in variables:
10351091
qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False)
1092+
if isinstance(qc_var_name, list):
1093+
qc_var_name = qc_var_name[0]
10361094
if qc_var_name is None:
10371095
if verbose:
10381096
if var_name in ['base_time', 'time_offset']:
@@ -1127,6 +1185,56 @@ def datafilter(
11271185
if verbose:
11281186
print(f'Deleting {qc_var_name} from dataset')
11291187

1188+
def merge_qc_variables(self, var_name, qc_var_names=None):
1189+
"""
1190+
Function to merge QC variables together based on what's defined in as
1191+
ancillary variables. Behaviour is to merge the qc into the first
1192+
qc variable listed in the ancillary_variables attribute unless otherwise
1193+
passed in as a keyword argument.
1194+
1195+
Parameters
1196+
----------
1197+
var_name : str
1198+
The data variable name to merge QC for
1199+
qc_var_names : list of str or None
1200+
List of quality control variable names merge together.
1201+
If None will look for ancillary variables
1202+
attribute on data variable and use those variables.
1203+
1204+
Returns
1205+
-------
1206+
merge_qc_var_name : str
1207+
The name of the quality control variable the other variables were
1208+
merged in to.
1209+
1210+
1211+
"""
1212+
if qc_var_names is None:
1213+
qc_var_names = self._ds.qcfilter.check_for_ancillary_qc(var_name, ignore_dims=True)
1214+
if isinstance(qc_var_names, list):
1215+
qc_var_names[0]
1216+
if len(qc_var_names) < 2:
1217+
raise ValueError('Data variable must have more than one ancillary variable.')
1218+
1219+
merge_qc_var_name = qc_var_names[0]
1220+
for i in range(len(qc_var_names) - 1):
1221+
qc_var = self._ds[qc_var_names[i + 1]]
1222+
flag_masks = qc_var.attrs['flag_masks']
1223+
for j, flag in enumerate(flag_masks):
1224+
flag_index = self._ds.qcfilter.get_qc_test_mask(
1225+
qc_var_name=qc_var_names[i + 1],
1226+
test_number=flag,
1227+
return_index=True,
1228+
)
1229+
self._ds.qcfilter.add_test(
1230+
var_name,
1231+
index=flag_index,
1232+
test_meaning=qc_var.attrs['flag_meanings'][j],
1233+
test_assessment=qc_var.attrs['flag_assessments'][j],
1234+
)
1235+
1236+
return merge_qc_var_name
1237+
11301238

11311239
def set_bit(array, bit_number):
11321240
"""

act/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
'EXAMPLE_SWATS',
6666
'EXAMPLE_AMERIFLUX_BASE',
6767
'EXAMPLE_AMERIFLUX_META',
68+
'EXAMPLE_SMPS',
6869
]
6970
},
7071
)

act/tests/sample_files.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,5 @@
159159
EXAMPLE_GML_AEROSOL_NAS = DATASETS.fetch(
160160
'US1200R.20200101000000.20210214053818.nephelometer.aerosol_light_scattering_coefficient.pm10.1y.1h.US06L_TSI_3563_MLO.US06L_scat_coef.lev2.nas'
161161
)
162+
163+
EXAMPLE_SMPS = DATASETS.fetch('houmergedsmpsapsmlM1.c1.20220801.000000.nc')
54.8 KB
Loading

0 commit comments

Comments
 (0)