/* * SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. */ /* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.opensearch.hadoop.serialization; import java.util.ArrayList; import java.util.List; import org.opensearch.hadoop.serialization.Parser.Token; import org.opensearch.hadoop.serialization.bulk.RawJson; import org.opensearch.hadoop.serialization.json.JacksonJsonGenerator; import org.opensearch.hadoop.util.FastByteArrayOutputStream; import org.opensearch.hadoop.util.StringUtils; public abstract class ParsingUtils { public static final String NOT_FOUND = "(not found)"; /** * Seeks the field with the given name in the stream and positions (and returns) the parser to the next available token (value or not). * Return null if no token is found. * * @param path * @param parser * @return token associated with the given path or null if not found */ public static Token seek(Parser parser, String path) { // return current token if no path is given if (!StringUtils.hasText(path)) { return null; } List tokens = StringUtils.tokenize(path, "."); return seek(parser, tokens.toArray(new String[tokens.size()])); } public static Token seek(Parser parser, String[] path1) { return seek(parser, path1, null); } public static Token seek(Parser parser, String[] path1, String[] path2) { return doSeekToken(parser, path1, 0, path2, 0); } private static Token doSeekToken(Parser parser, String[] path1, int index1, String[] path2, int index2) { Token token = null; String currentName; token = parser.currentToken(); if (token == null) { token = parser.nextToken(); } while ((token = parser.nextToken()) != null) { if (token == Token.START_OBJECT) { token = parser.nextToken(); } if (token == Token.FIELD_NAME) { // found a node, go one level deep currentName = parser.currentName(); if (path1 != null && currentName.equals(path1[index1])) { if (index1 + 1 < path1.length) { return doSeekToken(parser, path1, index1 + 1, null, 0); } else { return parser.nextToken(); } } else if (path2 != null && currentName.equals(path2[index2])) { if (index2 + 1 < path2.length) { return doSeekToken(parser, null, 0, path2, index2 + 1); } else { return parser.nextToken(); } } else { // get field token (can be value, object or array) parser.nextToken(); parser.skipChildren(); } } else { break; } } return null; } private static class Matcher { private final List tokens; private final String path; private boolean matched = false; private Object value; Matcher(String path) { this.path = path; tokens = StringUtils.tokenize(path, "."); } // number of levels required for the matcher int nesting() { return tokens.size() - 1; } boolean matches(String key, int level) { if (level < tokens.size()) { return tokens.get(level).equals(key); } return false; } void value(Object value) { matched = true; this.value = value; } @Override public String toString() { return path; } } public static List values(Parser parser, String... paths) { List matchers = new ArrayList(paths.length); int maxNesting = 0; for (String path : paths) { Matcher matcher = new Matcher(path); matchers.add(matcher); if (matcher.nesting() > maxNesting) { maxNesting = matcher.nesting(); } } doFind(parser, matchers, 0, maxNesting); List matches = new ArrayList(); for (Matcher matcher : matchers) { matches.add(matcher.matched ? matcher.value : NOT_FOUND); } return matches; } private static void doFind(Parser parser, List currentMatchers, int level, int maxNesting) { Token token = parser.currentToken(); if (token == null) { // advance to the initial START_OBJECT token parser.nextToken(); } while ((token = parser.nextToken()) != null && token != Token.END_OBJECT) { if (token == Token.FIELD_NAME) { String currentName = parser.currentName(); Object value = null; boolean valueRead = false; List nextLevel = null; for (Matcher matcher : currentMatchers) { if (matcher.matches(currentName, level)) { // found a match if (matcher.nesting() == level) { if (!valueRead) { valueRead = true; switch (parser.nextToken()) { case VALUE_NUMBER: value = parser.numberValue(); break; case VALUE_BOOLEAN: value = Boolean.valueOf(parser.booleanValue()); break; case VALUE_NULL: value = null; break; case VALUE_STRING: value = parser.text(); break; default: value = new RawJson(readValueAsString(parser)); } } matcher.value(value); } // partial match - keep it for the next level else { if (nextLevel == null) { nextLevel = new ArrayList(currentMatchers.size()); } nextLevel.add(matcher); } } } if (!valueRead) { // must parse or skip the value switch (parser.nextToken()) { case START_OBJECT: if (level < maxNesting && nextLevel != null) { doFind(parser, nextLevel, level + 1, maxNesting); } else { parser.skipChildren(); } break; case START_ARRAY: // arrays are not handled; simply ignore parser.skipChildren(); break; } } } } } private static String readValueAsString(Parser parser) { FastByteArrayOutputStream out = new FastByteArrayOutputStream(256); JacksonJsonGenerator generator = new JacksonJsonGenerator(out); traverse(parser, generator); generator.close(); return out.toString(); } private static void traverse(Parser parser, Generator generator) { Token t = parser.currentToken(); switch (t) { case START_OBJECT: traverseMap(parser, generator); break; case START_ARRAY: traverseArray(parser, generator); break; case FIELD_NAME: generator.writeFieldName(parser.currentName()); parser.nextToken(); traverse(parser, generator); break; case VALUE_STRING: generator.writeString(parser.text()); parser.nextToken(); break; case VALUE_BOOLEAN: generator.writeBoolean(parser.booleanValue()); parser.nextToken(); break; case VALUE_NULL: generator.writeNull(); parser.nextToken(); break; case VALUE_NUMBER: switch (parser.numberType()) { case INT: generator.writeNumber(parser.intValue()); break; case LONG: generator.writeNumber(parser.longValue()); break; case DOUBLE: generator.writeNumber(parser.doubleValue()); break; case FLOAT: generator.writeNumber(parser.floatValue()); break; } parser.nextToken(); break; } } private static void traverseMap(Parser parser, Generator generator) { generator.writeBeginObject(); parser.nextToken(); for (; parser.currentToken() != Token.END_OBJECT;) { traverse(parser, generator); } generator.writeEndObject(); parser.nextToken(); } private static void traverseArray(Parser parser, Generator generator) { generator.writeBeginArray(); parser.nextToken(); for (; parser.currentToken() != Token.END_ARRAY;) { traverse(parser, generator); } generator.writeEndArray(); parser.nextToken(); } public static void skipCurrentBlock(Parser parser) { int open = 1; while (true) { Token t = parser.nextToken(); if (t == null) { // handle EOF? return; } switch (t) { case START_OBJECT: case START_ARRAY: ++open; break; case END_OBJECT: case END_ARRAY: if (--open == 0) { return; } break; } } } }