/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://www.apache.org/licenses/LICENSE-2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package software.amazon.neptune.csv2rdf; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; import org.eclipse.rdf4j.rio.RDFFormat; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.JsonMappingException.Reference; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException; import com.fasterxml.jackson.dataformat.javaprop.JavaPropsFactory; import lombok.Getter; import lombok.NonNull; import lombok.Setter; /** * * {@link PropertyGraph2RdfConverter} converts property graph vertices and edges * stored as comma separated values into RDF N-Quads files. The conversion uses * two steps:
* First, an {@link PropertyGraph2RdfMapper} applies the configured * {@link PropertyGraph2RdfMapping} to the property graph data for generating * RDF resources, predicates, literals, triples, and, in case of edge * properties, quads.
* Then, an {@link UriPostTransformer} performs the configured * {@link UriPostTransformation}s on the RDF data. These transformations can be * used to rewrite resource IRIs into more readable ones by replacing parts of * them with property values. * */ @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE) public class PropertyGraph2RdfConverter { public static final RDFFormat RDF_FORMAT = RDFFormat.NQUADS; public static final String DEFAULT_PROPERTY_GRAPH_FILE_EXTENSION = "csv"; public static final String DEFAULT_RDF_FILE_EXTENSION = RDF_FORMAT.getDefaultFileExtension(); public static final String REPLACEMENT_VARIABLE = "{{VALUE}}"; /** * * Extension of the property graph input files. Only files matching the * extension are converted. */ @Getter @Setter private String inputFileExtension = DEFAULT_PROPERTY_GRAPH_FILE_EXTENSION; /** * * Output file suffix, determining the RDF format in which the result is * written. Currently, only N-Quads is supported so this value cannot be * changed. */ @Getter private String outputFileExtension = DEFAULT_RDF_FILE_EXTENSION; /** * * The {@link PropertyGraph2RdfMapper} performs the basic mapping defined in * {@link PropertyGraph2RdfMapping} from property graph vertices and edges into * RDF. */ @Getter @Setter private PropertyGraph2RdfMapper mapper = new PropertyGraph2RdfMapper(); /** * * The {@link UriPostTransformer} runs additional transformations defined in * {@link UriPostTransformation}s on RDF resource IRIs. */ @Getter @Setter private UriPostTransformer transformer = new UriPostTransformer(); /** * * @param config property file, can be {@code null} */ public PropertyGraph2RdfConverter(File config) { if (config != null) { this.load(config); } } /** * * Convert property graph files into RDF files. * * @param inputDirectory directory containing the property graph files, must * exist, available files must be UTF-8 encoded * @param outputDirectory output directory for the RDF files, must exist, output * will be UTF-8 encoded */ public void convert(File inputDirectory, File outputDirectory) { List propertyGraphFiles = this.listPropertyGraphFiles(inputDirectory); List rdfFiles = new ArrayList<>(); for (File propertyGraphFile : propertyGraphFiles) { File rdfFile = getRdfFile(outputDirectory, propertyGraphFile); mapper.map(propertyGraphFile, rdfFile); rdfFiles.add(rdfFile); } transformer.applyTo(rdfFiles, mapper.getMapping().getVertexNamespace()); } /** * * List files in a directory matching * {@link PropertyGraph2RdfConverter#inputFileExtension}. * * @param directory * @return list of matching files */ // visible for testing List listPropertyGraphFiles(File directory) { final File[] files = directory.listFiles((file) -> { return file.isFile() && file.getName().endsWith("." + inputFileExtension); }); if (files == null) { throw new Csv2RdfException("Could not read from input directory: " + directory.getAbsolutePath()); } if (files.length == 0) { throw new Csv2RdfException( "No files with extension " + inputFileExtension + " found at: " + directory.getAbsolutePath()); } return new ArrayList(Arrays.asList(files)); } /** * * @param rdfDirectory the output directory * @param propertyGraphFile * @return a file in the output directory with the name of the property graph * file but RDF extension of * {@link PropertyGraph2RdfConverter#outputFileExtension} */ // visible for testing File getRdfFile(File rdfDirectory, File propertyGraphFile) { String rdfFileName = propertyGraphFile.getName().replaceAll(Pattern.quote(inputFileExtension) + "$", outputFileExtension); return new File(rdfDirectory, rdfFileName); } /** * Load the configuration values and initialize all fields of the * {@link PropertyGraph2RdfConverter} instance and its dependent objects. * * @param config property file */ private void load(@NonNull File config) { try { ObjectMapper mapper = new ObjectMapper(new JavaPropsFactory()); mapper.readerForUpdating(this).readValue(config); } catch (UnrecognizedPropertyException e) { throw new Csv2RdfException( "Loading configuration failed because of unknown property: " + e.getPropertyName(), e); } catch (JsonMappingException e) { throw new Csv2RdfException(getErrorMessage(e), e); } catch (IOException e) { throw new Csv2RdfException("Configuration file not found: " + config.getAbsolutePath(), e); } } /** * Try to find the field and the specific cause where the failure occurred. As * {@link PropertyGraph2RdfMapping#setPgProperty2RdfResourcePattern} and * {@link UriPostTransformation#UriPostTransformation} perform consistency * checks, {@link Csv2RdfException} can be the cause of * {@link JsonMappingException}, too. * * @param e * @return error message */ private String getErrorMessage(JsonMappingException e) { List path = e.getPath(); String message; if (e.getCause() instanceof Csv2RdfException) { message = e.getCause().getMessage(); } else { message = e.getOriginalMessage(); } for (int i = path.size() - 1; i >= 0; --i) { String field = path.get(i).getFieldName(); if (field != null) { return "Loading configuration failed because of invalid input at " + field + ": " + message; } } return "Loading configuration failed because of invalid input: " + message; } }