/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package software.amazon.neptune.csv2rdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.UnsupportedRDFormatException;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptuneCsvSingleValuedUserDefinedProperty;
import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptuneCsvUserDefinedProperty;
import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptunePropertyGraphEdge;
import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptunePropertyGraphVertex;
import software.amazon.neptune.csv2rdf.PropertyGraph2RdfMapping.PropertyGraphEdge2RdfMapping;
import software.amazon.neptune.csv2rdf.PropertyGraph2RdfMapping.PropertyGraphVertex2RdfMapping;
/**
*
* This class performs the basic mapping specified in
* {@link PropertyGraph2RdfMapping} between property graph vertices and edges
* into RDF. RDF quads are used to represent edges with properties.
*
* The mapping can be defined in the configuration file.
*
* Simple Example
*
* Simplified configuration values:
*
*
* mapper.mapping.typeNamespace=type:
* mapper.mapping.vertexNamespace=vertex:
* mapper.mapping.edgeNamespace=edge:
* mapper.mapping.edgeContextNamespace=econtext:
* mapper.mapping.vertexPropertyNamespace=vproperty:
* mapper.mapping.edgePropertyNamespace=eproperty:
* mapper.mapping.defaultNamedGraph=dng:/
* mapper.mapping.defaultType=dt:/
* mapper.mapping.defaultProperty=dp:/
*
*
* Vertices:
*
*
* ~id,~label,name,code,country
* 1,city,Seattle,S,USA
* 2,city,Vancouver,V,CA
*
*
* Edges:
*
*
* ~id,~label,~from,~to,distance,type
* a,route,1,2,166,highway
*
*
* RDF statements:
*
*
* {@code
* .
* "Seattle" .
* "S" .
* "USA" .
* .
* "Vancouver" .
* "V" .
* "CA" .
*
* .
* "166" .
* "highway" .
* }
*
*/
@Slf4j
@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE)
public class PropertyGraph2RdfMapper {
/**
* When {@code true}, a property that is selected to create RDFS labels will
* create normal property statements, too. Otherwise only the RDFS label
* statements are created.
* The properties to create RDFS labels are configured in
* {@link PropertyGraph2RdfMapping#pgVertexType2PropertyForRdfsLabel}.
*/
@Getter
@Setter
private boolean alwaysAddPropertyStatements = true;
/**
* The {@link PropertyGraph2RdfMapping} defines how property graph vertices and
* edges are mapped to RDF.
*/
@Getter
@Setter
private PropertyGraph2RdfMapping mapping = new PropertyGraph2RdfMapping();
/**
* Map a property graph file to RDF
*
* @param propertyGraphInFile a property graph file
* @param rdfOutFile RDF output file
* @throws Csv2RdfException if an error occurs during the process
*/
public void map(final File propertyGraphInFile, File rdfOutFile) {
log.info("-> Converting input file {}...", propertyGraphInFile.getName());
RDFWriter rdfWriter = null;
try (NeptuneCsvInputParser inputParser = new NeptuneCsvInputParser(propertyGraphInFile);
FileOutputStream fos = new FileOutputStream(rdfOutFile)) {
rdfWriter = Rio.createWriter(PropertyGraph2RdfConverter.RDF_FORMAT, fos);
rdfWriter.startRDF();
rdfWriter.handleNamespace("vertex", mapping.getVertexNamespace());
rdfWriter.handleNamespace("edge", mapping.getEdgeNamespace());
rdfWriter.handleNamespace("vertexprop", mapping.getVertexPropertyNamespace());
rdfWriter.handleNamespace("edgeprop", mapping.getEdgePropertyNamespace());
while (inputParser.hasNext()) {
List statements = mapToStatements(inputParser.next());
for (Statement statement : statements) {
rdfWriter.handleStatement(statement);
}
}
rdfWriter.endRDF();
} catch (UnsupportedRDFormatException | RDFHandlerException | IOException e) {
throw new Csv2RdfException("Conversion of file " + propertyGraphInFile.getAbsolutePath() + " failed.", e);
}
}
/**
*
* Map a {@link NeptunePropertyGraphElement} to RDF statements according the
* configured {@link PropertyGraph2RdfMapper#mapping}.
*
* @param pgElement
* @return list of RDF statements
*/
private List mapToStatements(NeptunePropertyGraphElement pgElement) {
if (pgElement instanceof NeptunePropertyGraphEdge) {
return mapToStatements((NeptunePropertyGraphEdge) pgElement);
}
if (pgElement instanceof NeptunePropertyGraphVertex) {
return mapToStatements((NeptunePropertyGraphVertex) pgElement);
}
throw new IllegalArgumentException("Property graph element type not recognized: " + pgElement.getClass());
}
/**
*
* Map a {@link NeptunePropertyGraphEdge} to RDF statements according the
* configured {@link PropertyGraph2RdfMapper#mapping}.
*
* @param edge property graph edge
* @return list of RDF statements
*/
public List mapToStatements(NeptunePropertyGraphEdge edge) {
List statements = new ArrayList<>();
PropertyGraphEdge2RdfMapping edgeMapper = mapping.getEdge2RdfMapping();
// the edge itself
if (edge.hasLabel()) {
// edge ID goes into graph position
statements.add(
edgeMapper.createRelationStatement(edge.getFrom(), edge.getLabel(), edge.getTo(), edge.getId()));
} else {
statements.add(edgeMapper.createRelationStatement(edge.getFrom(), edge.getTo(), edge.getId()));
}
// append edge properties
for (NeptuneCsvSingleValuedUserDefinedProperty userDefinedProperty : edge.getUserDefinedProperties()) {
statements.add(edgeMapper.createLiteralStatement(edge.getId(), userDefinedProperty.getName(),
userDefinedProperty.getValue(), userDefinedProperty.getDataType()));
}
return statements;
}
/**
*
* Map a {@link NeptunePropertyGraphVertex} to RDF statements according the
* configured {@link PropertyGraph2RdfMapper#mapping}.
*
* @param vertex property graph vertex
* @return list of RDF statements
*/
public List mapToStatements(NeptunePropertyGraphVertex vertex) {
final List statements = new ArrayList<>();
final PropertyGraphVertex2RdfMapping vertexMapper = mapping.getVertex2RdfMapping();
Set propertiesForRdfsLabel = new HashSet<>();
// the vertex itself; for now, we always type (falling back on a default if no
// type is given)
if (vertex.getLabels().isEmpty()) {
statements.add(vertexMapper.createTypeStatement(vertex.getId()));
} else {
for (String label : vertex.getLabels()) {
statements.add(vertexMapper.createTypeStatement(vertex.getId(), label));
String propertyForRdfsLabel = vertexMapper.getPropertyForRdfsLabel(label);
if (propertyForRdfsLabel != null) {
propertiesForRdfsLabel.add(propertyForRdfsLabel);
}
}
}
for (NeptuneCsvUserDefinedProperty userDefinedProperty : vertex.getUserDefinedProperties()) {
String propertyName = userDefinedProperty.getName();
if (vertexMapper.containsRdfResourcePatternForProperty(propertyName)) {
// in this case, we do not write a literal statement but a relation
for (String value : userDefinedProperty.getValues()) {
statements.add(vertexMapper.createRelationStatement(vertex.getId(), propertyName, value));
}
} else {
boolean addRdfsLabel = propertiesForRdfsLabel.contains(propertyName);
// this property has been marked as the property used as the rdfs:label
if (addRdfsLabel) {
for (String value : userDefinedProperty.getValues()) {
statements.add(vertexMapper.createRdfsLabelStatement(vertex.getId(), value));
}
}
// if either this was not written as rdfs:label or the configuration tells us to
// write label properties
// redundantly, we also emit the datatype property statement
if (!addRdfsLabel || alwaysAddPropertyStatements) {
for (String value : userDefinedProperty.getValues()) {
statements.add(vertexMapper.createLiteralStatement(vertex.getId(), propertyName, value,
userDefinedProperty.getDataType()));
}
}
}
}
return statements;
}
}