Skip to content

Commit 36c12bd

Browse files
authored
Add option to split output according to predicates.
This commit adds a `--split-with-predicates` flag which changes the behavior of the `--split` flag. Typically, the output is split along a subject prefix and an object prefix, e.g. `MONDO-to-NCIT.sssom.tsv`. When the `--split-with-predicates` flag is passed, it also includes the CURIE of the relation in the name of the output split file, e.g. `MONDO-skos_exactMatch-NCIT.sssom.tsv`. (Note that the colon in the CURIE has been replaced by an underscore, since colons cannot appear in filenames in Windows). This (almost) matches the behavior of sssom-py: <https://github.com/mapping-commons/sssom-py/blob/ac0f769386d35e2b1c961cac59221c816143bf07/src/sssom/parsers.py#L1033-L1035> The difference being that sssom-py only includes the local name of the predicate, while this pull request includes the full CURIE.
1 parent 68e070b commit 36c12bd

File tree

1 file changed

+20
-3
lines changed

1 file changed

+20
-3
lines changed

cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ private static class OutputOptions {
198198
description = "Split the set along subject and object prefix names and write the split sets in the specified directory.")
199199
String splitDirectory;
200200

201+
@Option(names = "--split-with-predicates",
202+
description = "When splitting, include the predicate CURIE in the split identifier.")
203+
boolean splitWithPredicates;
204+
205+
201206
@Option(names = { "-c", "--force-cardinality" },
202207
hidden = true,
203208
description = "Include mapping cardinality values.")
@@ -646,7 +651,7 @@ private void writeOutput(MappingSet set) {
646651
}
647652

648653
if ( outputOpts.splitDirectory != null ) {
649-
writeSplitSet(set, outputOpts.splitDirectory);
654+
writeSplitSet(set, outputOpts.splitDirectory, outputOpts.splitWithPredicates);
650655
return; // Skip writing the full set when writing splits
651656
}
652657
boolean stdout = outputOpts.file.equals("-");
@@ -660,7 +665,7 @@ private void writeOutput(MappingSet set) {
660665
}
661666
}
662667

663-
private void writeSplitSet(MappingSet ms, String directory) {
668+
private void writeSplitSet(MappingSet ms, String directory, boolean splitWithPredicates) {
664669
File dir = new File(directory);
665670
if ( !dir.isDirectory() && !dir.mkdirs() ) {
666671
helper.error("cannot create directory %s", directory);
@@ -677,7 +682,19 @@ private void writeSplitSet(MappingSet ms, String directory) {
677682
String subjectPrefixName = pm.getPrefixName(mapping.getSubjectId());
678683
String objectPrefixName = pm.getPrefixName(mapping.getObjectId());
679684
if ( subjectPrefixName != null && objectPrefixName != null ) {
680-
String splitId = subjectPrefixName + "-to-" + objectPrefixName;
685+
String splitId;
686+
if (splitWithPredicates) {
687+
String predicatePrefixName = pm.getPrefixName(mapping.getPredicateId());
688+
if (predicatePrefixName != null) {
689+
splitId = subjectPrefixName + "-" + pm.shortenIdentifier(mapping.getPredicateId()) + "-" + objectPrefixName;
690+
splitId = splitId.replace(":", "_");
691+
} else {
692+
splitId = subjectPrefixName + "-to-" + objectPrefixName;
693+
}
694+
} else {
695+
splitId = subjectPrefixName + "-to-" + objectPrefixName;
696+
}
697+
681698
mappingsBySplit.computeIfAbsent(splitId, k -> new ArrayList<Mapping>()).add(mapping);
682699
}
683700
}

0 commit comments

Comments
 (0)