From da4fbb5bc69832407186f21a11950df2136ff51d Mon Sep 17 00:00:00 2001 From: odinlake Date: Sun, 14 Sep 2025 13:42:12 +0100 Subject: [PATCH 1/5] add validate util script --- totolo/util/makejson.py | 48 +++++++---------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/totolo/util/makejson.py b/totolo/util/makejson.py index 939356a..0873d32 100644 --- a/totolo/util/makejson.py +++ b/totolo/util/makejson.py @@ -1,10 +1,8 @@ import argparse import json -import re -import sys from collections import defaultdict -import totolo +import totolo.lib.argparse THEME_FIELDS_OFFICIAL = { @@ -110,31 +108,16 @@ def main(): Example: "to-makejson v2025.04 -tsc > ontology_v202404.json" """ - version_patt = re.compile("v\\d{4}\\.\\d{2}$") - parser = argparse.ArgumentParser( - description=( - "Output a version of the ontology as json. " - "Use -t -s -c to select themes, stories, and/or collections respectively. " - "If none of these flags are given, all will be included. " - ), - epilog=main.__doc__ - ) - parser.add_argument("source", nargs="*", help= - "Paths to include or version to download. " - "If a single argument matching tag pattern vYYYY.MM is given, " - "it will be interpreted as a version. " - "Otherwise the arguments will be treated as one or more local paths. " - ) - parser.add_argument("-p", "--path", help="Path to the ontology.") + parser = totolo.lib.argparse.parser(( + "Output a version of the ontology as json. " + "Use -t -s -c to select themes, stories, and/or collections respectively. " + "If none of these flags are given, all will be included. " + ), main.__doc__) parser.add_argument("--verbosity", default="official", help= "Which fields to include. " "'official': (default) include fields for official release. " "'all': include all fields. " ) - parser.add_argument( - "-v", "--version", help="Named version to use. If not specified the latest version of the " - "master branch will be used." - ) parser.add_argument("-t", action='store_true', help="Include themes.") parser.add_argument("-s", action='store_true', help="Include stories.") parser.add_argument("-c", action='store_true', help="Include collections.") @@ -144,25 +127,11 @@ def main(): "component entries on the collections instead. " ) args = parser.parse_args() - - if sum(1 for x in [args.path, args.version, args.source] if x) > 1: - sys.stderr.write("Can specify at most one of --path, --version, or positional argument.") + ontology = totolo.lib.argparse.ontology(args) + if not ontology: return - if args.source: - if len(args.source) == 1 and version_patt.match(args.source[0]): - ontology = totolo.remote.version(args.source[0]) - else: - ontology = totolo.files(args.source) - elif args.path: - ontology = totolo.files(args.path) - elif args.version: - ontology = totolo.remote.version(args.version) - else: - ontology = totolo.remote() - if args.reorg: ontology.organize_collections() - if not any([args.t, args.s, args.c]): dd = make_json(ontology, verbosity=args.verbosity) else: @@ -173,7 +142,6 @@ def main(): with_collections=args.c, verbosity=args.verbosity, ) - try: print(json.dumps(dd, indent=4, ensure_ascii=False)) except BrokenPipeError: # pragma: no cover From 80b54d16308c3a052fd43879895d309088cc6a45 Mon Sep 17 00:00:00 2001 From: odinlake Date: Sun, 14 Sep 2025 16:43:02 +0100 Subject: [PATCH 2/5] validate script and lint fixes --- tests/utils/test_validate.py | 77 ++++++++++++++++++++++++++++++++++++ totolo/lib/argparse.py | 56 ++++++++++++++++++++++++++ totolo/util/makejson.py | 4 +- totolo/util/validate.py | 24 +++++++++++ 4 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 tests/utils/test_validate.py create mode 100644 totolo/lib/argparse.py create mode 100644 totolo/util/validate.py diff --git a/tests/utils/test_validate.py b/tests/utils/test_validate.py new file mode 100644 index 0000000..f7cae01 --- /dev/null +++ b/tests/utils/test_validate.py @@ -0,0 +1,77 @@ +import sys +from unittest.mock import patch +import urllib + +import totolo +import totolo.util.validate + +from tests.test_totolo import precache_remote_resources + + +EXPECTED_WARNINGS_20230723 = """ +tests/data/sample-2023.07.23/notes/stories/film/film-scifi-1920s.st.txt: In movie: The Hands of Orlac (1924): Missing '{' in: ['body part transplant ', 'hand', '', ''] +tests/data/sample-2023.07.23/notes/stories/film/film-scifi-1930s.st.txt: In movie: The Walking Dead (1936): Missing '{' in: ['artificial body part ', 'heart', '', ''] +tests/data/sample-2023.07.23/notes/stories/film/film-scifi-1930s.st.txt: In movie: The Man They Could Not Hang (1939): Missing '{' in: ['artificial body part ', 'heart', '', ''] +tests/data/sample-2023.07.23/notes/stories/film/film-scifi-1930s.st.txt: In movie: The Return of Doctor X (1939): Missing '{' in: ['artificial body part ', 'blood', '', ''] +tests/data/sample-2023.07.23/notes/themes/primary.th.txt: artificial body part: unknown field 'Template' +tests/data/sample-2023.07.23/notes/themes/primary.th.txt: historical figure: unknown field 'Template' +movie: Algol: Tragedy of Power (1920): Undefined 'major theme' with name 'the lust for gold' +movie: Woman in the Moon (1929): Undefined 'minor theme' with name 'the lust for gold' +""".strip() + + +def validate1(capsys, expected = None): + out, err = capsys.readouterr() + assert all(line.startswith("::") for line in err.strip().splitlines()) + assert out.strip() == expected or EXPECTED_WARNINGS_20230723 + + +class TestMakeJson: + def test_from_path(self, capsys): + p1 = "tests/data/sample-2023.07.23/notes" + testargs = ["makejson", "--path", p1] + with patch.object(sys, 'argv', testargs): + totolo.util.validate.main() + validate1(capsys) + + def test_from_path_narg(self, capsys): + p1 = "tests/data/sample-2023.07.23/notes" + testargs = ["makejson", p1] + with patch.object(sys, 'argv', testargs): + totolo.util.validate.main() + validate1(capsys) + + def test_bad_usage(self, capsys): + testargs = ["makejson", "--path", "foo", "--version", "foo"] + with patch.object(sys, 'argv', testargs): + totolo.util.validate.main() + out, err = capsys.readouterr() + assert all(x in err for x in ["--path", "--version", "positional"]) + assert not out + + def test_remote_version(self, capsys): + precache_remote_resources() + testargs = ["makejson", "--version", "v2023.06"] + with patch.object(sys, 'argv', testargs): + with open("tests/data/sample-2023.07.23.tar.gz", "rb+") as fh: + with patch.object(urllib.request, 'urlopen', return_value=fh): + totolo.util.validate.main() + validate1(capsys) + + def test_remote_version_narg(self, capsys): + precache_remote_resources() + testargs = ["makejson", "v2023.06"] + with patch.object(sys, 'argv', testargs): + with open("tests/data/sample-2023.07.23.tar.gz", "rb+") as fh: + with patch.object(urllib.request, 'urlopen', return_value=fh): + totolo.util.validate.main() + validate1(capsys) + + def test_remote_head(self, capsys): + precache_remote_resources() + testargs = ["makejson"] + with patch.object(sys, 'argv', testargs): + with open("tests/data/sample-2023.07.23.tar.gz", "rb+") as fh: + with patch.object(urllib.request, 'urlopen', return_value=fh): + totolo.util.validate.main() + validate1(capsys) diff --git a/totolo/lib/argparse.py b/totolo/lib/argparse.py new file mode 100644 index 0000000..e8f3dce --- /dev/null +++ b/totolo/lib/argparse.py @@ -0,0 +1,56 @@ +import argparse +import re +import sys + +import totolo + + +VERSION_PATTERN = re.compile("v\\d{4}\\.\\d{2}$") + + +def get_parser(description, epilog): + parser = argparse.ArgumentParser( + description=description, + epilog=epilog, + ) + parser.add_argument("source", nargs="*", help= + "Paths to include or version to download. " + "If a single argument matching tag pattern vYYYY.MM is given, " + "it will be interpreted as a version. " + "Otherwise the arguments will be treated as one or more local paths. " + ) + parser.add_argument("-p", "--path", help="Path to the ontology.") + parser.add_argument( + "-v", "--version", help="Named version to use. If not specified the latest version of the " + "master branch will be used." + ) + return parser + + +def get_ontology(args, quiet=True): + if sum(1 for x in [args.path, args.version, args.source] if x) > 1: + print("Can specify at most one of --path, --version, or positional argument.", + file=sys.stderr) + return None + if args.source: + if len(args.source) == 1 and VERSION_PATTERN.match(args.source[0]): + if not quiet: + print(f":: loading TO version {args.source[0]}", file=sys.stderr) + ontology = totolo.remote.version(args.source[0]) + else: + if not quiet: + print(f":: loading TO files {args.source}", file=sys.stderr) + ontology = totolo.files(args.source) + elif args.path: + if not quiet: + print(f":: loading TO files {args.path}", file=sys.stderr) + ontology = totolo.files(args.path) + elif args.version: + if not quiet: + print(f":: loading TO version {args.version}", file=sys.stderr) + ontology = totolo.remote.version(args.version) + else: + if not quiet: + print(":: loading TO working HEAD version", file=sys.stderr) + ontology = totolo.remote() + return ontology diff --git a/totolo/util/makejson.py b/totolo/util/makejson.py index 0873d32..637ca04 100644 --- a/totolo/util/makejson.py +++ b/totolo/util/makejson.py @@ -108,7 +108,7 @@ def main(): Example: "to-makejson v2025.04 -tsc > ontology_v202404.json" """ - parser = totolo.lib.argparse.parser(( + parser = totolo.lib.argparse.get_parser(( "Output a version of the ontology as json. " "Use -t -s -c to select themes, stories, and/or collections respectively. " "If none of these flags are given, all will be included. " @@ -127,7 +127,7 @@ def main(): "component entries on the collections instead. " ) args = parser.parse_args() - ontology = totolo.lib.argparse.ontology(args) + ontology = totolo.lib.argparse.get_ontology(args) if not ontology: return if args.reorg: diff --git a/totolo/util/validate.py b/totolo/util/validate.py new file mode 100644 index 0000000..e542424 --- /dev/null +++ b/totolo/util/validate.py @@ -0,0 +1,24 @@ +import totolo.lib.argparse + + +def main(): + """ + This utility is provided as a command line script. + + Example: + "to-validate v2025.04" + """ + parser = totolo.lib.argparse.get_parser( + "Load a version of the ontology and print any warnings about syntax. ", + main.__doc__ + ) + ontology = totolo.lib.argparse.get_ontology(parser.parse_args(), quiet=False) + try: + if ontology: + ontology.print_warnings() + except BrokenPipeError: # pragma: no cover + pass + + +if __name__ == "__main__": + main() From ca0bb11f29bfb77b48a95be91d54476ab0535903 Mon Sep 17 00:00:00 2001 From: odinlake Date: Sun, 14 Sep 2025 17:25:38 +0100 Subject: [PATCH 3/5] fix bug in validate that caused spurious {} warnings --- totolo/impl/to_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/totolo/impl/to_parser.py b/totolo/impl/to_parser.py index 5911fcb..6b77141 100644 --- a/totolo/impl/to_parser.py +++ b/totolo/impl/to_parser.py @@ -110,7 +110,7 @@ def iter_kwitems_strict( state_idx = ramp.index(part) if state_idx > 3: raise AssertionError(f"Unexpected {part} in: {row}") - close_bracket = ramp[state_idx + 3] + close_bracket = ramp[state_idx + 3] if state_idx > 0 else "" else: acc.append(part) From 91579ad549c0675ae4b2fe75b58560bb2fa7bec1 Mon Sep 17 00:00:00 2001 From: odinlake Date: Sun, 14 Sep 2025 17:29:14 +0100 Subject: [PATCH 4/5] add cmd line; bump version --- pyproject.toml | 1 + totolo/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 870aa7a..a51358d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ to-mergefiles = "totolo.util.mergefiles:main" to-mergelist = "totolo.util.mergelist:main" to-makelist = "totolo.util.makelist:main" to-makejson = "totolo.util.makejson:main" +to-validate = "totolo.util.validate:main" [tool.pylint.BASIC] good-names = "a,sa" diff --git a/totolo/__init__.py b/totolo/__init__.py index bbf884f..19f018e 100644 --- a/totolo/__init__.py +++ b/totolo/__init__.py @@ -5,7 +5,7 @@ remote = TORemote() -__version__ = "2.1.1" +__version__ = "2.1.2" __ALL__ = [ empty, files, From 76fd77e83cc8587313943b1d4fe71dd0d708836c Mon Sep 17 00:00:00 2001 From: odinlake Date: Sat, 20 Sep 2025 16:09:04 +0100 Subject: [PATCH 5/5] improve warnings, improve test coverage --- tests/test_entries.py | 18 ++++++++++++++++++ totolo/impl/to_entry.py | 11 ++++++----- totolo/impl/to_parser.py | 7 +++++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tests/test_entries.py b/tests/test_entries.py index 7c97a1a..b8ec3f3 100644 --- a/tests/test_entries.py +++ b/tests/test_entries.py @@ -1,9 +1,11 @@ import pytest from totolo.impl.to_entry import TOEntry +from totolo.impl.to_parser import TOParser from totolo.story import TOStory from totolo.theme import TOTheme + STORY_DATA = { "Title": "foo title", "Description": "foo description" * 200, @@ -73,6 +75,22 @@ def test_oddities(self): with pytest.raises(KeyError): del entry["foo"] + def test_validate_entries(self): + bad_lines = """ +goofy [baz] {{widget}} +{bad_lines} +""".splitlines()) + warnings = list(entry.validate_keywords()) + for bad_line in bad_lines.splitlines(): + assert any(bad_line in x for x in warnings) + class TestTOStory: def test_story_subtype(self): diff --git a/totolo/impl/to_entry.py b/totolo/impl/to_entry.py index 6c3bef4..7e15814 100644 --- a/totolo/impl/to_entry.py +++ b/totolo/impl/to_entry.py @@ -119,11 +119,12 @@ def validate_keywords(self): from .to_parser import TOParser # pylint: disable=cyclic-import for field in self.fields.values(): if field.fieldtype == "kwlist": - data_iter = filter(None, (x.strip() for x in field.source[1:])) - try: - list(TOParser.iter_kwitems_strict(data_iter)) - except AssertionError as exc: - yield f"In {self.name}: {exc.args[0]}" + for line in (x.strip() for x in field.source[1:]): + if line: + try: + list(TOParser.iter_kwitems_strict([line])) + except AssertionError as exc: + yield f"In {self.name}: {exc.args[0]}" def text_canonical(self): lines = [self.name, "=" * len(self.name), ""] diff --git a/totolo/impl/to_parser.py b/totolo/impl/to_parser.py index 6b77141..83a5cc8 100644 --- a/totolo/impl/to_parser.py +++ b/totolo/impl/to_parser.py @@ -105,11 +105,14 @@ def iter_kwitems_strict( state_idx = 0 close_bracket = "" elif close_bracket: - raise AssertionError(f"Missing '{close_bracket}' in: {row}") + raise AssertionError( + f"Bad bracketing, found '{part}' when expecting '{close_bracket}', " + f"in: {line}" + ) else: state_idx = ramp.index(part) if state_idx > 3: - raise AssertionError(f"Unexpected {part} in: {row}") + raise AssertionError(f"Unexpected {part} in: {line}") close_bracket = ramp[state_idx + 3] if state_idx > 0 else "" else: acc.append(part)