diff --git a/README.md b/README.md index 6386389..1ad45df 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Life cycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![downloads](https://img.shields.io/pypi/dm/totolo.svg)](https://pypistats.org/packages/totolo) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) - + # totolo A Python package for working with data from the Theme Ontology [theming repository](https://github.com/theme-ontology/theming/). @@ -16,11 +16,13 @@ Or clone this repository and copy the `totolo` directory wherever you need it. N ```mermaid erDiagram - "ontology" ||--o{ "story" : contains - "ontology" ||--o{ "theme" : contains - "theme" ||--|{ "field" : contains - "story" ||--|{ "field" : contains - "story" ||--|{ "(weight, keyword)" : contains + direction LR + "ontology" ||--o{ "story" : "o[...]" + "ontology" ||--o{ "theme" : "o[...]" + "theme" ||--|{ "field" : ".get(...)" + "story" ||--|{ "field" : ".get(...)" + "story" ||--|{ "(weight, keyword)" : ".iter_theme_entries()" + "(weight, keyword)" ||--|| "theme" : "(references)" ``` ## totolo - Overview @@ -33,9 +35,9 @@ Inline documentation: [keyword](https://github.com/theme-ontology/python-totolo/blob/main/totolo/keyword.py). Create an ontology object by, for example, fetching a named version remotely. -On the ontology object access stories or themes using their unique name and bracket notation. +On the ontology object access stories or themes using their unique name and bracket notation. Access basic information on the theme or story by accessing named fields using the get-method. -Named fields are defined with the *sa* annotation in the story and theme source code linked above at the top of the respective class. +Named fields are defined with the *"sa"* annotation in the story and theme source code linked above at the top of the respective class. Access minor/major/choice theme entries on a story by iterating over them. ```python @@ -53,10 +55,10 @@ The above linked source code for them is intended to be readable. For any story, theme, field or keyword objects you can obtain a text representation that is the same as it would be if the ontology was written to file. ```python -text_s = story.text_canonical(); story.print() -text_t = theme.text_canonical(); theme.print() -text_f = field.str(); print(field) -text_kw = keyword.str(); print(keyword) +_ = story.text(); story.print() +_ = theme.text(); theme.print() +_ = field.str(); print(field) +_ = keyword.str(); print(keyword) ``` Although `totolo` can be used to programmatically edit the structure of the ontology, the documentation is @@ -88,7 +90,7 @@ foremost intended for those who read the ontology to analyse it in python or fee ... print(f"{weight:<15} {theme.name}") ``` -``` +``` Choice Themes betrayal Choice Themes the lust for power (...) @@ -166,4 +168,3 @@ board](https://github.com/theme-ontology/python-totolo/discussions/). ###### Files and Code Test Coverage [![codecov](https://codecov.io/gh/theme-ontology/python-totolo/branch/main/graphs/icicle.svg?token=1Z39E9IE2W)](https://codecov.io/gh/theme-ontology/python-totolo) - diff --git a/examples/basics.py b/examples/basics.py index a21c90f..3f507ec 100644 --- a/examples/basics.py +++ b/examples/basics.py @@ -2,7 +2,7 @@ import os.path -def example(): +def example_read_write(): #: get the latest main branch version of the ontology ontology = totolo.remote() print(ontology) @@ -19,6 +19,10 @@ def example(): # <2945 themes, 4475 stories> print("---") + +def example_iterate(): + ontology = totolo.remote() + #: go over all the themes for theme in ontology.themes(): if "romantic love" in theme.name: @@ -44,5 +48,21 @@ def example(): print("---") +def example_cross_reference(): + ontology = totolo.remote() + story = ontology.story["play: Macbeth (1606)"] + + #: fetch any stories that have several major/choice themes in common + weight = ["choice", "major"] + theme_set = story.themes(weight) + story_set = {st for st in ontology.stories() if len(st.themes(weight) & theme_set) >= 5} + print(sorted(st.name for st in story_set)) + + # ['movie: Ran (1985)', 'movie: Star Wars: Episode III - Revenge of the Sith (2005)', (...) + print("---") + + if __name__ == "__main__": - example() + example_read_write() + example_iterate() + example_cross_reference() diff --git a/release.sh b/scripts/release.sh similarity index 100% rename from release.sh rename to scripts/release.sh diff --git a/scripts/test.sh b/scripts/test.sh new file mode 100755 index 0000000..7cf0038 --- /dev/null +++ b/scripts/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -euo pipefail +cd `git rev-parse --show-toplevel` +set -o xtrace +pytest --cov-report=xml --cov-fail-under=100 --cov=totolo tests/ +pylint totolo diff --git a/tests/test_toset.py b/tests/test_toset.py index d2fab0c..2c550f0 100644 --- a/tests/test_toset.py +++ b/tests/test_toset.py @@ -1,5 +1,5 @@ import totolo -from totolo.impl.to_dict import TOSet +from totolo.impl.to_containers import TOSet class TestTOSet: diff --git a/tests/test_totolo.py b/tests/test_totolo.py index b53cd5b..043ae52 100644 --- a/tests/test_totolo.py +++ b/tests/test_totolo.py @@ -208,6 +208,32 @@ def test_sampling(self): assert isinstance(story, TOStory) assert isinstance(theme, TOTheme) + def test_story_themes(self): + ontology = totolo.files("tests/data/sample-2023.07.23") + story = ontology.story["movie: Frankenstein (1931)"] + minor = { + 'body snatching', 'coping with the death of someone', 'electricity', + "pride in one's own creation", 'scientist occupation', 'unrequited love', + } + major = { + 'engaged couple', 'hubris', 'mad scientist stereotype', 'maker and monster', + 'obsession', 'playing God with nature', 'pride goes before a fall', 'undead being', + 'what it is like to be different', + } + assert {s.name for s in story.themes("minor")} == minor + assert {s.name for s in story.themes("Minor Themes")} == minor + assert {s.name for s in story.themes(["major", "minor"])} == minor | major + assert {s.name for s in story.themes({"major", "minor"})} == minor | major + assert {s.name for s in story.themes()} == minor | major + + def test_story_to_text(self): + ontology = totolo.files("tests/data/sample-2023.07.23") + assert isinstance(ontology.story["movie: Frankenstein (1931)"].text(), str) + + def test_theme_to_text(self): + ontology = totolo.files("tests/data/sample-2023.07.23") + assert isinstance(ontology.theme["romantic love"].text(), str) + def test_story_to_theme(self): ontology = totolo.files("tests/data/sample-2023.07.23") story = ontology.story["movie: Frankenstein (1931)"] diff --git a/totolo/__init__.py b/totolo/__init__.py index 55764a3..28e742d 100644 --- a/totolo/__init__.py +++ b/totolo/__init__.py @@ -5,7 +5,7 @@ remote = TORemote() -__version__ = "2.0.0" +__version__ = "2.1.0" __ALL__ = [ empty, files, diff --git a/totolo/field.py b/totolo/field.py index 83089c5..957ae8e 100644 --- a/totolo/field.py +++ b/totolo/field.py @@ -23,9 +23,15 @@ def text_canonical(self): return "\n".join(parts) def text_original(self): + """ + Return the original text read from files. + """ return "\n".join(self.source) def delete_kw(self, keyword): + """ + If this field is a KW field (fieldtype=='kwlist'), delete a keyword in it. + """ assert self.mutable().setup().fieldtype == "kwlist" todelete = set() for idx, part in enumerate(self.parts): @@ -36,6 +42,9 @@ def delete_kw(self, keyword): def update_kw(self, match_keyword, keyword=None, motivation=None, capacity=None, notes=None): + """ + If this field is a KW field (fieldtype=='kwlist'), modify a keyword in it. + """ assert self.mutable().setup().fieldtype == "kwlist" for part in self.parts: if part.keyword == match_keyword: @@ -49,6 +58,9 @@ def update_kw(self, match_keyword, keyword=None, part.notes = notes def insert_kw(self, idx=None, keyword="", motivation="", capacity="", notes=""): + """ + If this field is a KW field (fieldtype=='kwlist'), add a keyword to it. + """ assert self.mutable().setup().fieldtype == "kwlist" if idx is None: idx = len(self.parts) @@ -63,6 +75,10 @@ def insert_kw(self, idx=None, keyword="", motivation="", capacity="", notes=""): ) def find_kw(self, match_keyword): + """ + If this field is a KW field (fieldtype=='kwlist'), find a "Keyword" object by matching + the keyword on it. For example, find the theme "love" used in choice themes on story. + """ for part in self.parts: if part.keyword == match_keyword: return part diff --git a/totolo/impl/to_base.py b/totolo/impl/to_base.py index 3d9c9ed..27eacc1 100644 --- a/totolo/impl/to_base.py +++ b/totolo/impl/to_base.py @@ -7,7 +7,7 @@ from ..story import TOStory from ..theme import TOTheme from .to_object import TOObject, a -from .to_dict import TODict +from .to_containers import TODict class TOBase(TOObject): diff --git a/totolo/impl/to_dict.py b/totolo/impl/to_containers.py similarity index 93% rename from totolo/impl/to_dict.py rename to totolo/impl/to_containers.py index 4574120..44e1391 100644 --- a/totolo/impl/to_dict.py +++ b/totolo/impl/to_containers.py @@ -1,6 +1,4 @@ from .to_entry import TOEntry -from ..theme import TOTheme -from ..story import TOStory class TOSet(set): @@ -23,6 +21,8 @@ def dataframe( motivation=False, descriptions=False, ): + from ..theme import TOTheme # pylint: disable=cyclic-import + from ..story import TOStory # pylint: disable=cyclic-import subset_stories = [x for x in self if isinstance(x, TOStory)] subset_themes = [x for x in self if isinstance(x, TOTheme)] for obj in self: diff --git a/totolo/ontology.py b/totolo/ontology.py index c43f468..ede64f3 100644 --- a/totolo/ontology.py +++ b/totolo/ontology.py @@ -5,32 +5,47 @@ class ThemeOntology(TOBase): """ - Common API for ThemeOntology. + Common API for the Theme Ontology. + These methods have the highest degree of support. See TOBase for more methods. + Use brackets like `ontology[name]` to access uniquely named stories or themes + in the ontology. """ def stories(self): - """Iterate over the TOStory objects contained.""" + """ + Iterate over the TOStory objects contained. + """ yield from self.story.values() def themes(self): - """Iterate over the TOTheme objects contained.""" + """ + Iterate over the TOTheme objects contained. + """ yield from self.theme.values() def astory(self): - """Pick a TOStory object uniformely at random.""" + """ + Pick a TOStory object uniformely at random. + """ return random.sample(list(self.story.values()), 1)[0] def atheme(self): - """Pick a TOTheme object uniformely at random.""" + """ + Pick a TOTheme object uniformely at random. + """ return random.sample(list(self.theme.values()), 1)[0] def to_dict(self): - """Present the ontology as a dictionary, suitable for json output.""" + """ + Present the ontology as a dictionary, suitable for json output. + """ return self._impl.to_dict(self) def dataframe(self, subset_stories=(), subset_themes=(), implied_themes=False, motivation=False, descriptions=False, ): - """Present some or all of the ontology as a pandas DataFrame, if pandas is installed.""" + """ + Present some or all of the ontology as a pandas DataFrame, if pandas is installed. + """ import pandas as pd headers, data = self._impl.dataframe_records( subset_stories, subset_themes, implied_themes, motivation, descriptions, @@ -38,11 +53,15 @@ def dataframe(self, subset_stories=(), subset_themes=(), implied_themes=False, return pd.DataFrame(data, columns=headers) def print_warnings(self): - """Do basic validation and print warnings to stdout.""" + """ + Do basic validation and print warnings to stdout. + """ for msg in self.validate(): print(msg) return self def write_clean(self): - """Write ontology back to its source in a canonical format.""" + """ + Write ontology back to its source in a canonical format. + """ self.write(cleaned=True) diff --git a/totolo/story.py b/totolo/story.py index b5e8bc6..1e1d608 100644 --- a/totolo/story.py +++ b/totolo/story.py @@ -1,11 +1,16 @@ import html import re -from .impl.to_object import sa +from .impl.to_containers import TOSet from .impl.to_entry import TOEntry +from .impl.to_object import sa class TOStory(TOEntry): + """ + A story in the ontology. The attributes defined with "sa" denote named fields that + can be accessed using, e.g., `story.get("Title")`. + """ Title = sa("text", required=True) Date = sa("date", required=True) Description = sa("text") @@ -55,6 +60,12 @@ def title(self) -> str: """ return self.get("Title").text_canonical_contents().strip() + def text(self): + """ + A nicely formatted text representation of the story. + """ + return self.text_canonical() + def ancestors(self) -> 'Iterable[TOStory]': """ Return a TOCollection set with all stories containing this story as a component. @@ -90,6 +101,22 @@ def iter_themes(self) -> 'Iterable[tuple(str, TOTheme)]': theme = ontology.theme[part.keyword] yield weight, theme + def themes(self, weight=None): + """ + Return a list of all themes by weight. + :param weight: "choice", "major", or "minor". If None, admit all. + """ + weight = weight or [] + if isinstance(weight, str): + weight = [weight] + def shorten(w): + return w.split(" ", 1)[0].lower() + weight = {shorten(w) for w in weight} + return TOSet( + t for w, t in self.iter_themes() + if not weight or shorten(w) in weight + ) + def verbose_description(self) -> str: """ A lengthy text description of the story. diff --git a/totolo/theme.py b/totolo/theme.py index 415b995..ee674c5 100644 --- a/totolo/theme.py +++ b/totolo/theme.py @@ -5,6 +5,10 @@ class TOTheme(TOEntry): + """ + A theme in the ontology. The attributes defined with "sa" denote named fields that + can be accessed using, e.g., `theme.get("Description")`. + """ Description = sa("text", required=True) Parents = sa("list") Notes = sa("text") @@ -12,6 +16,12 @@ class TOTheme(TOEntry): References = sa("list") Aliases = sa("list") + def text(self): + """ + A nicely formatted text representation of the theme. + """ + return self.text_canonical() + def ancestors(self) -> 'Iterable[TOTheme]': """ Return a TOCollection set with all themes that contain this story beneeth it in