Skip to content

Commit da399b7

Browse files
authored
Fix/check append rep (#277)
* add option that the headers of an append-mode file can have different format * add tests for irregular headers in append mode * hotfix: fix bug in append start read implementation * lint * lint II
1 parent 1002dd0 commit da399b7

File tree

5 files changed

+2601
-28
lines changed

5 files changed

+2601
-28
lines changed

pyerrors/input/sfcf.py

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ..obs import Obs
66
from .utils import sort_names, check_idl
77
import itertools
8+
import warnings
89

910

1011
sep = "/"
@@ -603,42 +604,82 @@ def _read_chunk_data(chunk, start_read, T, corr_line, b2b, pattern, im, single):
603604
return data
604605

605606

607+
def _check_append_rep(content, start_list):
608+
data_len_list = []
609+
header_len_list = []
610+
has_regular_len_heads = True
611+
for chunk_num in range(len(start_list)):
612+
start = start_list[chunk_num]
613+
if chunk_num == len(start_list) - 1:
614+
stop = len(content)
615+
else:
616+
stop = start_list[chunk_num + 1]
617+
chunk = content[start:stop]
618+
for linenumber, line in enumerate(chunk):
619+
if line.startswith("[correlator]"):
620+
header_len = linenumber
621+
break
622+
header_len_list.append(header_len)
623+
data_len_list.append(len(chunk) - header_len)
624+
625+
if len(set(header_len_list)) > 1:
626+
warnings.warn("Not all headers have the same length. Data parts do.")
627+
has_regular_len_heads = False
628+
629+
if len(set(data_len_list)) > 1:
630+
raise Exception("Irregularities in file structure found, not all run data are of the same output length")
631+
return has_regular_len_heads
632+
633+
634+
def _read_chunk_structure(chunk, pattern, b2b):
635+
start_read = 0
636+
for linenumber, line in enumerate(chunk):
637+
if line.startswith("gauge_name"):
638+
gauge_line = linenumber
639+
elif line.startswith("[correlator]"):
640+
corr_line = linenumber
641+
found_pat = ""
642+
for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
643+
found_pat += li
644+
if re.search(pattern, found_pat):
645+
start_read = corr_line + 7 + b2b
646+
break
647+
if start_read == 0:
648+
raise ValueError("Did not find pattern\n", pattern)
649+
endline = corr_line + 6 + b2b
650+
while not chunk[endline] == "\n":
651+
endline += 1
652+
T = endline - start_read
653+
return gauge_line, corr_line, start_read, T
654+
655+
606656
def _read_append_rep(filename, pattern, b2b, im, single, idl_func, cfg_func_args):
607657
with open(filename, 'r') as fp:
608658
content = fp.readlines()
609-
data_starts = []
659+
chunk_start_lines = []
610660
for linenumber, line in enumerate(content):
611661
if "[run]" in line:
612-
data_starts.append(linenumber)
613-
if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
614-
raise Exception("Irregularities in file structure found, not all runs have the same output length")
615-
chunk = content[:data_starts[1]]
616-
for linenumber, line in enumerate(chunk):
617-
if line.startswith("gauge_name"):
618-
gauge_line = linenumber
619-
elif line.startswith("[correlator]"):
620-
corr_line = linenumber
621-
found_pat = ""
622-
for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
623-
found_pat += li
624-
if re.search(pattern, found_pat):
625-
start_read = corr_line + 7 + b2b
626-
break
627-
else:
628-
raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
629-
endline = corr_line + 6 + b2b
630-
while not chunk[endline] == "\n":
631-
endline += 1
632-
T = endline - start_read
633-
634-
# all other chunks should follow the same structure
662+
chunk_start_lines.append(linenumber)
663+
has_regular_len_heads = _check_append_rep(content, chunk_start_lines)
664+
if has_regular_len_heads:
665+
chunk = content[:chunk_start_lines[1]]
666+
try:
667+
gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
668+
except ValueError:
669+
raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename, "lines", 1, "to", chunk_start_lines[1] + 1)
670+
# if has_regular_len_heads is true, all other chunks should follow the same structure
635671
rep_idl = []
636672
rep_data = []
637673

638-
for cnfg in range(len(data_starts)):
639-
start = data_starts[cnfg]
640-
stop = start + data_starts[1]
674+
for chunk_num in range(len(chunk_start_lines)):
675+
start = chunk_start_lines[chunk_num]
676+
if chunk_num == len(chunk_start_lines) - 1:
677+
stop = len(content)
678+
else:
679+
stop = chunk_start_lines[chunk_num + 1]
641680
chunk = content[start:stop]
681+
if not has_regular_len_heads:
682+
gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
642683
try:
643684
idl = idl_func(chunk[gauge_line], *cfg_func_args)
644685
except Exception:

0 commit comments

Comments
 (0)