Skip to content

Commit 63b5b47

Browse files
committed
1466-instantiate-csv-data-as-rdf: added RDF conversion using RML rules and the RML mapper tool
1 parent 17071fa commit 63b5b47

File tree

8 files changed

+111
-13
lines changed

8 files changed

+111
-13
lines changed

stack-clients/src/main/java/com/cmclinnovations/stack/clients/core/EndpointNames.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ public final class EndpointNames {
77
public static final String GRAFANA = "grafana";
88
public static final String GEOSERVER = "geoserver";
99
public static final String RML = "yarrrml-parser";
10+
public static final String RML_JAVA = "rml-mapper";
1011

1112
private EndpointNames() {
1213
}

stack-clients/src/main/java/com/cmclinnovations/stack/clients/core/datasets/RML.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ public boolean usesBlazegraph() {
1616
void loadInternal(Dataset parent) {
1717
Path subdirectory = this.getSubdirectory()
1818
.orElseThrow(() -> new RuntimeException("No 'subdirectory' specified - required for RML data"));
19-
Map<String, byte[]> rmlRules = RmlMapperClient.getInstance().parseYarrrmlToRml(
20-
parent.getDirectory().resolve(subdirectory), parent.getNamespace());
21-
RmlMapperClient.getInstance().parseRmlToRDF(rmlRules);
19+
Path fullSubDirPath = parent.getDirectory().resolve(subdirectory);
20+
RmlMapperClient mapperClient = RmlMapperClient.getInstance();
21+
Map<String, byte[]> rmlRules = mapperClient.parseYarrrmlToRml(fullSubDirPath, parent.getNamespace());
22+
mapperClient.parseRmlToRDF(fullSubDirPath, rmlRules);
2223
}
2324
}

stack-clients/src/main/java/com/cmclinnovations/stack/clients/rml/RmlMapperClient.java

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.nio.file.Path;
1010
import java.text.MessageFormat;
1111
import java.util.Collection;
12+
import java.util.List;
1213
import java.util.Map;
1314
import java.util.stream.Collectors;
1415

@@ -26,7 +27,9 @@ public class RmlMapperClient extends ClientWithEndpoint<RmlMapperEndpointConfig>
2627

2728
private static final String CSV_FILE_EXTENSION = "csv";
2829
private static final String YML_FILE_EXTENSION = "yml";
30+
private static final String TTL_FILE_EXTENSION = "ttl";
2931
private static final String YARRRML_PARSER_EXECUTABLE_PATH = "/app/bin/parser.js";
32+
private static final String TEMP_CONTAINER_DATA_DIR_PATH = "/data";
3033

3134
private static RmlMapperClient instance = null;
3235

@@ -57,10 +60,33 @@ public Map<String, byte[]> parseYarrrmlToRml(Path dirPath, String namespace) {
5760
* Parses the RML rules into RDF triples that will be uploaded at the target
5861
* namespace.
5962
*
63+
* @param dirPath Target directory path.
6064
* @param rmlRules Input RML rules.
6165
*/
62-
public void parseRmlToRDF(Map<String, byte[]> rmlRules) {
63-
LOGGER.info("Reading the RML rules...");
66+
public void parseRmlToRDF(Path dirPath, Map<String, byte[]> rmlRules) {
67+
LOGGER.info("Uploading the RML rules and csv files into the target container...");
68+
String rmlMapperJavaContainerId = super.getContainerId(EndpointNames.RML_JAVA);
69+
70+
List<String> csvFiles = rmlRules.keySet().stream()
71+
.map(file -> FileUtils.replaceExtension(file, CSV_FILE_EXTENSION))
72+
.collect(Collectors.toList());
73+
super.sendFiles(rmlMapperJavaContainerId, dirPath.toAbsolutePath().toString(), csvFiles,
74+
TEMP_CONTAINER_DATA_DIR_PATH);
75+
super.sendFilesContent(rmlMapperJavaContainerId, rmlRules, TEMP_CONTAINER_DATA_DIR_PATH);
76+
77+
LOGGER.info("Converting and uploading csv data...");
78+
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
79+
ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
80+
rmlRules.keySet().stream().forEach(file -> {
81+
LOGGER.info("Executing RML rules for {}...", file);
82+
String execId = super.createComplexCommand(rmlMapperJavaContainerId, "java", "-jar", "/rmlmapper.jar", "-m",
83+
"/data/" + file, "-s", "turtle")
84+
.withOutputStream(outputStream)
85+
.withErrorStream(errorStream)
86+
.exec();
87+
super.handleErrors(errorStream, execId, LOGGER);
88+
});
89+
super.deleteDirectory(rmlMapperJavaContainerId, TEMP_CONTAINER_DATA_DIR_PATH);
6490
}
6591

6692
/**
@@ -118,14 +144,14 @@ private Map<String, byte[]> genRmlRules(Path dirPath, String namespace) {
118144
}
119145
}));
120146
// Send the files to a new data directory
121-
super.sendFilesContent(containerId, yarrrmlRules, "/data");
147+
super.sendFilesContent(containerId, yarrrmlRules, TEMP_CONTAINER_DATA_DIR_PATH);
122148

123-
// Execute the command and return the RML rules alongside the CSV file name
149+
// Execute the command and return the RML rules alongside the TTL file name
124150
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
125151
ByteArrayOutputStream errorStream = new ByteArrayOutputStream();
126152
Map<String, byte[]> rmlRules = yarrrmlRules.entrySet().stream()
127153
.collect(Collectors.toMap(
128-
entry -> FileUtils.replaceExtension(entry.getKey(), CSV_FILE_EXTENSION),
154+
entry -> FileUtils.replaceExtension(entry.getKey(), TTL_FILE_EXTENSION),
129155
entry -> {
130156
LOGGER.info("Generating RML rules from {}...", entry.getKey());
131157
String execId = super.createComplexCommand(containerId, YARRRML_PARSER_EXECUTABLE_PATH, "-i",
@@ -137,7 +163,7 @@ private Map<String, byte[]> genRmlRules(Path dirPath, String namespace) {
137163
return outputStream.toByteArray();
138164
}));
139165
LOGGER.info("RML rules are generated. Removing any temporary YARRRML files in the container...");
140-
super.deleteDirectory(containerId, "/data");
166+
super.deleteDirectory(containerId, TEMP_CONTAINER_DATA_DIR_PATH);
141167
return rmlRules;
142168
}
143169

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package com.cmclinnovations.stack.clients.rml;
2+
3+
import com.cmclinnovations.stack.clients.core.PasswordEndpointConfig;
4+
import com.fasterxml.jackson.annotation.JsonIgnore;
5+
import com.fasterxml.jackson.annotation.JsonInclude;
6+
import com.fasterxml.jackson.annotation.JsonInclude.Include;
7+
8+
@JsonInclude(Include.NON_NULL)
9+
public class RmlMapperJavaEndpointConfig extends PasswordEndpointConfig {
10+
private final String hostName;
11+
private final String port;
12+
private final String username;
13+
14+
protected RmlMapperJavaEndpointConfig() {
15+
this(null, null, null, null, null);
16+
}
17+
18+
public RmlMapperJavaEndpointConfig(String name, String hostName, String port, String username, String passwordFile) {
19+
super(name, passwordFile);
20+
this.hostName = hostName;
21+
this.port = port;
22+
this.username = username;
23+
}
24+
25+
public String getHostName() {
26+
return hostName;
27+
}
28+
29+
public String getPort() {
30+
return port;
31+
}
32+
33+
public String getUsername() {
34+
return username;
35+
}
36+
37+
@JsonIgnore
38+
public String getServiceUrl() {
39+
return "http://" + hostName + ":" + port + "/rml-mapper";
40+
}
41+
}

stack-clients/src/main/java/com/cmclinnovations/stack/clients/utils/YarrrmlFile.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ private void appendSources(Map<String, Object> output, Path filePath) {
128128

129129
Map<String, Object> sources = new HashMap<>(this.sourcesTemplate);
130130
Map<String, Object> sourceRef = this.castToMapStringObject(sources.get(SOURCE_REF_KEY));
131-
sourceRef.put(ACCESS_KEY, FileUtils.replaceExtension(this.fileName, "csv"));
131+
sourceRef.put(ACCESS_KEY, "/data/" + FileUtils.replaceExtension(this.fileName, "csv"));
132132
output.put(SOURCES_KEY, sources);
133133
}
134134

@@ -170,9 +170,10 @@ private void updateMappings(Map<String, Object> output) {
170170
stringObjectMap.put(TARGETS_KEY, TARGET_REF_KEY);
171171
}
172172

173-
// Transformation of po if necessary to be YARRRML compliant
173+
// Transformation of po if necessary to be YARRRML compliant
174174
// SnakeYAML transforms the YML content for nested lists into - -
175-
// BUT YARRRML parser only accepts -[] as a shortcut and should be updated accordingly
175+
// BUT YARRRML parser only accepts -[] as a shortcut and should be updated
176+
// accordingly
176177
List<Map<String, String>> transformedPo = new ArrayList<>();
177178
List<Object> originalPo = this.castToListObject(mappingValue.get(PRED_OBJ_KEY));
178179
for (Object predObj : originalPo) {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package com.cmclinnovations.stack.services;
2+
3+
import com.cmclinnovations.stack.services.config.ServiceConfig;
4+
5+
public class RmlMapperJavaService extends ContainerService {
6+
7+
public static final String TYPE = "rml-mapper";
8+
9+
public RmlMapperJavaService(String stackName, ServiceConfig config) {
10+
super(stackName, config);
11+
}
12+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"type": "rml-mapper",
3+
"ServiceSpec": {
4+
"Name": "rml-mapper",
5+
"TaskTemplate": {
6+
"ContainerSpec": {
7+
"Image": "rmlio/rmlmapper-java:v7.3.1",
8+
"Command": [
9+
"tail",
10+
"-f",
11+
"/dev/null"
12+
]
13+
}
14+
}
15+
}
16+
}

stack-clients/src/test/java/com/cmclinnovations/stack/clients/utils/YarrrmlFileTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ private String genExpectedYarrrmlContents(String fileName) throws IOException {
105105
Map<String, Map<String, Object>> sources = new LinkedHashMap<>();
106106
Map<String, Object> sourceRef = new LinkedHashMap<>();
107107
sourceRef.put("referenceFormulation", "csv");
108-
sourceRef.put("access", FileUtils.replaceExtension(fileName, "csv"));
108+
sourceRef.put("access", "/data/" + FileUtils.replaceExtension(fileName, "csv"));
109109
sources.put("source-ref", sourceRef);
110110
yamlData.put("sources", sources);
111111

0 commit comments

Comments
 (0)