/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://www.apache.org/licenses/LICENSE-2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package software.amazon.neptune.csv2rdf; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFWriter; import org.eclipse.rdf4j.rio.Rio; import org.eclipse.rdf4j.rio.UnsupportedRDFormatException; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptuneCsvSingleValuedUserDefinedProperty; import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptuneCsvUserDefinedProperty; import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptunePropertyGraphEdge; import software.amazon.neptune.csv2rdf.NeptunePropertyGraphElement.NeptunePropertyGraphVertex; import software.amazon.neptune.csv2rdf.PropertyGraph2RdfMapping.PropertyGraphEdge2RdfMapping; import software.amazon.neptune.csv2rdf.PropertyGraph2RdfMapping.PropertyGraphVertex2RdfMapping; /** * * This class performs the basic mapping specified in * {@link PropertyGraph2RdfMapping} between property graph vertices and edges * into RDF. RDF quads are used to represent edges with properties. * * The mapping can be defined in the configuration file. * * Simple Example
* * Simplified configuration values: * *
 * mapper.mapping.typeNamespace=type:
 * mapper.mapping.vertexNamespace=vertex:
 * mapper.mapping.edgeNamespace=edge:
 * mapper.mapping.edgeContextNamespace=econtext:
 * mapper.mapping.vertexPropertyNamespace=vproperty:
 * mapper.mapping.edgePropertyNamespace=eproperty:
 * mapper.mapping.defaultNamedGraph=dng:/
 * mapper.mapping.defaultType=dt:/
 * mapper.mapping.defaultProperty=dp:/
 * 
* * Vertices: * *
 * ~id,~label,name,code,country
 * 1,city,Seattle,S,USA
 * 2,city,Vancouver,V,CA
 * 
* * Edges: * *
 * ~id,~label,~from,~to,distance,type
 * a,route,1,2,166,highway
 * 
* * RDF statements: * *
 * {@code
 *     .
 *   "Seattle"  .
 *   "S"  .
 *   "USA"  .
 *     .
 *   "Vancouver"  .
 *   "V"  .
 *   "CA"  .
 *
 *     .
 *   "166"  .
 *   "highway"  .
 * }
 * 
*/ @Slf4j @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE) public class PropertyGraph2RdfMapper { /** * When {@code true}, a property that is selected to create RDFS labels will * create normal property statements, too. Otherwise only the RDFS label * statements are created.
* The properties to create RDFS labels are configured in * {@link PropertyGraph2RdfMapping#pgVertexType2PropertyForRdfsLabel}. */ @Getter @Setter private boolean alwaysAddPropertyStatements = true; /** * The {@link PropertyGraph2RdfMapping} defines how property graph vertices and * edges are mapped to RDF. */ @Getter @Setter private PropertyGraph2RdfMapping mapping = new PropertyGraph2RdfMapping(); /** * Map a property graph file to RDF * * @param propertyGraphInFile a property graph file * @param rdfOutFile RDF output file * @throws Csv2RdfException if an error occurs during the process */ public void map(final File propertyGraphInFile, File rdfOutFile) { log.info("-> Converting input file {}...", propertyGraphInFile.getName()); RDFWriter rdfWriter = null; try (NeptuneCsvInputParser inputParser = new NeptuneCsvInputParser(propertyGraphInFile); FileOutputStream fos = new FileOutputStream(rdfOutFile)) { rdfWriter = Rio.createWriter(PropertyGraph2RdfConverter.RDF_FORMAT, fos); rdfWriter.startRDF(); rdfWriter.handleNamespace("vertex", mapping.getVertexNamespace()); rdfWriter.handleNamespace("edge", mapping.getEdgeNamespace()); rdfWriter.handleNamespace("vertexprop", mapping.getVertexPropertyNamespace()); rdfWriter.handleNamespace("edgeprop", mapping.getEdgePropertyNamespace()); while (inputParser.hasNext()) { List statements = mapToStatements(inputParser.next()); for (Statement statement : statements) { rdfWriter.handleStatement(statement); } } rdfWriter.endRDF(); } catch (UnsupportedRDFormatException | RDFHandlerException | IOException e) { throw new Csv2RdfException("Conversion of file " + propertyGraphInFile.getAbsolutePath() + " failed.", e); } } /** * * Map a {@link NeptunePropertyGraphElement} to RDF statements according the * configured {@link PropertyGraph2RdfMapper#mapping}. * * @param pgElement * @return list of RDF statements */ private List mapToStatements(NeptunePropertyGraphElement pgElement) { if (pgElement instanceof NeptunePropertyGraphEdge) { return mapToStatements((NeptunePropertyGraphEdge) pgElement); } if (pgElement instanceof NeptunePropertyGraphVertex) { return mapToStatements((NeptunePropertyGraphVertex) pgElement); } throw new IllegalArgumentException("Property graph element type not recognized: " + pgElement.getClass()); } /** * * Map a {@link NeptunePropertyGraphEdge} to RDF statements according the * configured {@link PropertyGraph2RdfMapper#mapping}. * * @param edge property graph edge * @return list of RDF statements */ public List mapToStatements(NeptunePropertyGraphEdge edge) { List statements = new ArrayList<>(); PropertyGraphEdge2RdfMapping edgeMapper = mapping.getEdge2RdfMapping(); // the edge itself if (edge.hasLabel()) { // edge ID goes into graph position statements.add( edgeMapper.createRelationStatement(edge.getFrom(), edge.getLabel(), edge.getTo(), edge.getId())); } else { statements.add(edgeMapper.createRelationStatement(edge.getFrom(), edge.getTo(), edge.getId())); } // append edge properties for (NeptuneCsvSingleValuedUserDefinedProperty userDefinedProperty : edge.getUserDefinedProperties()) { statements.add(edgeMapper.createLiteralStatement(edge.getId(), userDefinedProperty.getName(), userDefinedProperty.getValue(), userDefinedProperty.getDataType())); } return statements; } /** * * Map a {@link NeptunePropertyGraphVertex} to RDF statements according the * configured {@link PropertyGraph2RdfMapper#mapping}. * * @param vertex property graph vertex * @return list of RDF statements */ public List mapToStatements(NeptunePropertyGraphVertex vertex) { final List statements = new ArrayList<>(); final PropertyGraphVertex2RdfMapping vertexMapper = mapping.getVertex2RdfMapping(); Set propertiesForRdfsLabel = new HashSet<>(); // the vertex itself; for now, we always type (falling back on a default if no // type is given) if (vertex.getLabels().isEmpty()) { statements.add(vertexMapper.createTypeStatement(vertex.getId())); } else { for (String label : vertex.getLabels()) { statements.add(vertexMapper.createTypeStatement(vertex.getId(), label)); String propertyForRdfsLabel = vertexMapper.getPropertyForRdfsLabel(label); if (propertyForRdfsLabel != null) { propertiesForRdfsLabel.add(propertyForRdfsLabel); } } } for (NeptuneCsvUserDefinedProperty userDefinedProperty : vertex.getUserDefinedProperties()) { String propertyName = userDefinedProperty.getName(); if (vertexMapper.containsRdfResourcePatternForProperty(propertyName)) { // in this case, we do not write a literal statement but a relation for (String value : userDefinedProperty.getValues()) { statements.add(vertexMapper.createRelationStatement(vertex.getId(), propertyName, value)); } } else { boolean addRdfsLabel = propertiesForRdfsLabel.contains(propertyName); // this property has been marked as the property used as the rdfs:label if (addRdfsLabel) { for (String value : userDefinedProperty.getValues()) { statements.add(vertexMapper.createRdfsLabelStatement(vertex.getId(), value)); } } // if either this was not written as rdfs:label or the configuration tells us to // write label properties // redundantly, we also emit the datatype property statement if (!addRdfsLabel || alwaysAddPropertyStatements) { for (String value : userDefinedProperty.getValues()) { statements.add(vertexMapper.createLiteralStatement(vertex.getId(), propertyName, value, userDefinedProperty.getDataType())); } } } } return statements; } }