Skip to content

Commit 2e3183e

Browse files
authored
Add a validate tabular script
Added a validate tabular script
2 parents e7da665 + 41aba17 commit 2e3183e

File tree

5 files changed

+567
-0
lines changed

5 files changed

+567
-0
lines changed

hed/cli/cli.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,175 @@ def validate_sidecar_cmd(
538538
ctx.exit(result if result is not None else 0)
539539

540540

541+
@validate.command(
542+
name="tabular",
543+
epilog="""
544+
This command validates HED in a tabular file (TSV) against a specified HED schema
545+
version. It can optionally include a sidecar file and check for warnings.
546+
547+
\b
548+
Examples:
549+
# Basic validation of a TSV file
550+
hedpy validate tabular events.tsv -sv 8.3.0
551+
552+
# Validate with a sidecar
553+
hedpy validate tabular events.tsv -s sidecar.json -sv 8.3.0
554+
555+
# Validate with multiple schemas (base + library)
556+
hedpy validate tabular events.tsv -s sidecar.json -sv 8.3.0 -sv score_1.1.0
557+
558+
# Check for warnings as well as errors
559+
hedpy validate tabular events.tsv -sv 8.4.0 --check-for-warnings
560+
561+
# Limit reported errors
562+
hedpy validate tabular events.tsv -sv 8.4.0 -el 5
563+
564+
# Save validation results to a file
565+
hedpy validate tabular events.tsv -sv 8.4.0 -o validation_results.txt
566+
""",
567+
)
568+
@click.argument("tabular_file", type=click.Path(exists=True))
569+
# Validation options
570+
@optgroup.group("Validation options")
571+
@optgroup.option(
572+
"-sv",
573+
"--schema-version",
574+
required=True,
575+
multiple=True,
576+
metavar="VERSION",
577+
help="HED schema version(s) to validate against (e.g., '8.4.0'). Can be specified multiple times for multiple schemas (e.g., -sv lang_1.1.0 -sv score_2.1.0)",
578+
)
579+
@optgroup.option(
580+
"-s",
581+
"--sidecar",
582+
type=click.Path(exists=True),
583+
metavar=METAVAR_FILE,
584+
help="BIDS JSON sidecar file to use during validation",
585+
)
586+
@optgroup.option(
587+
"-w",
588+
"--check-for-warnings",
589+
is_flag=True,
590+
help="Check for warnings as well as errors",
591+
)
592+
@optgroup.option(
593+
"-el",
594+
"--error-limit",
595+
type=int,
596+
metavar=METAVAR_N,
597+
help="Limit number of errors reported per code (default: No limit)",
598+
)
599+
@optgroup.option(
600+
"-ef",
601+
"--errors-by-file",
602+
is_flag=True,
603+
help="If using --error-limit, apply the limit per-file rather than globally",
604+
)
605+
# Output options
606+
@optgroup.group("Output options")
607+
@optgroup.option(
608+
"-f",
609+
"--format",
610+
type=click.Choice(["text", "json"]),
611+
default="text",
612+
show_default="text",
613+
help="Output format for validation results (text: human-readable; json: structured format for programmatic use)",
614+
)
615+
@optgroup.option(
616+
"-o",
617+
"--output-file",
618+
type=click.Path(),
619+
default="",
620+
metavar=METAVAR_FILE,
621+
help="Path for output file to hold validation results; if not specified, output to stdout",
622+
)
623+
# Logging options
624+
@optgroup.group("Logging options")
625+
@optgroup.option(
626+
"-l",
627+
"--log-level",
628+
type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
629+
default="WARNING",
630+
show_default="WARNING",
631+
help="Log level for diagnostic messages",
632+
)
633+
@optgroup.option(
634+
"-v",
635+
"--verbose",
636+
is_flag=True,
637+
help="Output informational messages (equivalent to --log-level INFO)",
638+
)
639+
@optgroup.option(
640+
"-lf",
641+
"--log-file",
642+
type=click.Path(),
643+
metavar=METAVAR_FILE,
644+
help="File path for saving log output; logs still go to stderr unless --log-quiet is also used",
645+
)
646+
@optgroup.option(
647+
"-lq",
648+
"--log-quiet",
649+
is_flag=True,
650+
help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)",
651+
)
652+
@optgroup.option(
653+
"--no-log",
654+
is_flag=True,
655+
help="Disable all logging output",
656+
)
657+
@click.pass_context
658+
def validate_tabular_cmd(
659+
ctx,
660+
tabular_file,
661+
schema_version,
662+
sidecar,
663+
check_for_warnings,
664+
error_limit,
665+
errors_by_file,
666+
format,
667+
output_file,
668+
log_level,
669+
log_file,
670+
log_quiet,
671+
no_log,
672+
verbose,
673+
):
674+
"""Validate HED in a tabular file.
675+
676+
TABULAR_FILE: The path to the tabular file (e.g., TSV) to validate.
677+
"""
678+
from hed.scripts.validate_hed_tabular import main as validate_tabular_main
679+
680+
args = [tabular_file]
681+
for version in schema_version:
682+
args.extend(["-sv", version])
683+
if sidecar:
684+
args.extend(["-s", sidecar])
685+
if check_for_warnings:
686+
args.append("-w")
687+
if error_limit is not None:
688+
args.extend(["-el", str(error_limit)])
689+
if errors_by_file:
690+
args.append("-ef")
691+
if format:
692+
args.extend(["-f", format])
693+
if output_file:
694+
args.extend(["-o", output_file])
695+
if log_level:
696+
args.extend(["-l", log_level])
697+
if log_file:
698+
args.extend(["-lf", log_file])
699+
if log_quiet:
700+
args.append("-lq")
701+
if no_log:
702+
args.append("--no-log")
703+
if verbose:
704+
args.append("-v")
705+
706+
result = validate_tabular_main(args)
707+
ctx.exit(result if result is not None else 0)
708+
709+
541710
@schema.command(name="validate")
542711
@click.argument("schema_path", type=click.Path(exists=True), nargs=-1, required=True)
543712
@click.option("--add-all-extensions", is_flag=True, help="Always verify all versions of the same schema are equal")
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python
2+
"""
3+
Validates HED in a tabular file (TSV) against a specified schema version.
4+
5+
This script validates HED in a tabular file, optionally with a JSON sidecar,
6+
against a specified HED schema version.
7+
"""
8+
9+
import argparse
10+
import sys
11+
import os
12+
from hed.models import TabularInput, Sidecar
13+
from hed.errors import ErrorHandler
14+
from hed.schema import load_schema_version
15+
from hed.scripts.script_utils import setup_logging, format_validation_results
16+
17+
18+
def get_parser():
19+
"""Create the argument parser for validate_hed_tabular.
20+
21+
Returns:
22+
argparse.ArgumentParser: Configured argument parser.
23+
"""
24+
parser = argparse.ArgumentParser(
25+
description="Validate HED in a tabular file against a HED schema", formatter_class=argparse.RawDescriptionHelpFormatter
26+
)
27+
28+
# Required arguments
29+
parser.add_argument("tabular_file", help="Tabular file (TSV) to validate")
30+
parser.add_argument(
31+
"-sv",
32+
"--schema-version",
33+
required=True,
34+
nargs="+",
35+
dest="schema_version",
36+
help="HED schema version(s) to validate against (e.g., '8.4.0' or '8.3.0 score_1.1.0' for multiple schemas)",
37+
)
38+
39+
# Optional arguments
40+
parser.add_argument(
41+
"-s",
42+
"--sidecar",
43+
dest="sidecar_file",
44+
help="Optional BIDS JSON sidecar file to use during validation",
45+
)
46+
parser.add_argument(
47+
"-w",
48+
"--check-for-warnings",
49+
action="store_true",
50+
dest="check_for_warnings",
51+
help="Check for warnings in addition to errors",
52+
)
53+
54+
# Error limiting
55+
error_group = parser.add_argument_group("Error limiting options")
56+
error_group.add_argument(
57+
"-el",
58+
"--error-limit",
59+
type=int,
60+
dest="error_limit",
61+
default=None,
62+
help="Limit number of errors reported per code (default: No limit)",
63+
)
64+
error_group.add_argument(
65+
"-ef",
66+
"--errors-by-file",
67+
action="store_true",
68+
dest="errors_by_file",
69+
help="If using --error-limit, apply the limit per-file rather than globally",
70+
)
71+
72+
# Output options
73+
output_group = parser.add_argument_group("Output options")
74+
output_group.add_argument(
75+
"-f",
76+
"--format",
77+
choices=["text", "json"],
78+
default="text",
79+
help="Output format for validation results (default: %(default)s)",
80+
)
81+
output_group.add_argument(
82+
"-o",
83+
"--output-file",
84+
default="",
85+
dest="output_file",
86+
help="Output file for validation results; if not specified, output to stdout",
87+
)
88+
89+
# Logging options
90+
logging_group = parser.add_argument_group("Logging options")
91+
logging_group.add_argument(
92+
"-l",
93+
"--log-level",
94+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
95+
default="WARNING",
96+
dest="log_level",
97+
help="Logging level (default: %(default)s)",
98+
)
99+
logging_group.add_argument("-lf", "--log-file", default="", dest="log_file", help="File path for saving log output")
100+
logging_group.add_argument(
101+
"-lq", "--log-quiet", action="store_true", dest="log_quiet", help="Suppress log output to stderr when using --log-file"
102+
)
103+
logging_group.add_argument("--no-log", action="store_true", dest="no_log", help="Disable all logging output")
104+
logging_group.add_argument("-v", "--verbose", action="store_true", help="Output informational messages")
105+
106+
return parser
107+
108+
109+
def main(arg_list=None):
110+
"""Main function for validating HED in a tabular file.
111+
112+
Parameters:
113+
arg_list (list or None): Command line arguments.
114+
"""
115+
parser = get_parser()
116+
args = parser.parse_args(arg_list)
117+
118+
# Set up logging
119+
setup_logging(args.log_level, args.log_file, args.log_quiet, args.verbose, args.no_log)
120+
121+
import logging
122+
123+
logger = logging.getLogger("validate_hed_tabular")
124+
effective_level_name = logging.getLevelName(logger.getEffectiveLevel())
125+
logger.info(
126+
"Starting HED validation of tabular file with effective log level: %s (requested: %s, verbose=%s)",
127+
effective_level_name,
128+
args.log_level,
129+
"on" if args.verbose else "off",
130+
)
131+
132+
try:
133+
# Load schema (handle single version or list of versions)
134+
schema_versions = args.schema_version[0] if len(args.schema_version) == 1 else args.schema_version
135+
logging.info(f"Loading HED schema version(s) {schema_versions}")
136+
schema = load_schema_version(schema_versions)
137+
138+
# Parse Sidecar if provided
139+
sidecar = None
140+
issues = []
141+
error_handler = ErrorHandler(check_for_warnings=args.check_for_warnings)
142+
143+
if args.sidecar_file:
144+
logging.info("Loading Sidecar file")
145+
sidecar = Sidecar(args.sidecar_file, name=os.path.basename(args.sidecar_file))
146+
sidecar_issues = sidecar.validate(schema, name=sidecar.name, error_handler=error_handler)
147+
issues += sidecar_issues
148+
if sidecar_issues:
149+
logging.warning(f"Found {len(sidecar_issues)} issues in sidecar validation")
150+
151+
# Parse and Validate Tabular Input
152+
logging.info("Loading Tabular file")
153+
tabular_input = TabularInput(args.tabular_file, sidecar=sidecar, name=os.path.basename(args.tabular_file))
154+
155+
logging.info("Validating Tabular file")
156+
# Validate tabular input
157+
tabular_issues = tabular_input.validate(schema, name=tabular_input.name, error_handler=error_handler)
158+
issues += tabular_issues
159+
160+
# Handle output
161+
if issues:
162+
# Format validation errors
163+
output = format_validation_results(
164+
issues,
165+
output_format=args.format,
166+
title_message="HED validation issues:",
167+
error_limit=args.error_limit,
168+
errors_by_file=args.errors_by_file,
169+
)
170+
171+
# Write output
172+
if args.output_file:
173+
with open(args.output_file, "w") as f:
174+
f.write(output)
175+
logging.info(f"Validation errors written to {args.output_file}")
176+
else:
177+
print(output)
178+
179+
return 1 # Exit with error code if validation failed
180+
else:
181+
# Success message
182+
success_msg = "Tabular file has valid HED!"
183+
if args.output_file:
184+
with open(args.output_file, "w") as f:
185+
f.write(success_msg + "\n")
186+
logging.info(f"Validation results written to {args.output_file}")
187+
else:
188+
print(success_msg)
189+
190+
return 0
191+
192+
except Exception as e:
193+
logging.error(f"Validation failed: {str(e)}")
194+
# If verbose, print stack trace
195+
if args.verbose:
196+
import traceback
197+
198+
traceback.print_exc()
199+
return 1
200+
201+
202+
if __name__ == "__main__":
203+
sys.exit(main())

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ hedpy = "hed.cli.cli:main"
101101
validate_bids = "hed.scripts.validate_bids:main"
102102
validate_hed_string = "hed.scripts.validate_hed_string:main"
103103
validate_hed_sidecar = "hed.scripts.validate_hed_sidecar:main"
104+
validate_hed_tabular = "hed.scripts.validate_hed_tabular:main"
104105
hed_extract_bids_sidecar = "hed.scripts.hed_extract_bids_sidecar:main"
105106
hed_validate_schemas = "hed.scripts.validate_schemas:main"
106107
hed_update_schemas = "hed.scripts.hed_convert_schema:main"

0 commit comments

Comments
 (0)