/* * SPDX-License-Identifier: Apache-2.0 * * The OpenSearch Contributors require contributions made to * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ /* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /* * Modifications Copyright OpenSearch Contributors. See * GitHub history for details. */ package org.opensearch.ingest.useragent; import org.opensearch.OpenSearchParseException; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.common.xcontent.XContentType; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; final class UserAgentParser { private final UserAgentCache cache; private final List uaPatterns = new ArrayList<>(); private final List osPatterns = new ArrayList<>(); private final List devicePatterns = new ArrayList<>(); private final String name; UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) { this.name = name; this.cache = cache; try { init(regexStream); } catch (IOException e) { throw new OpenSearchParseException("error parsing regular expression file", e); } } private void init(InputStream regexStream) throws IOException { // EMPTY is safe here because we don't use namedObject XContentParser yamlParser = XContentType.YAML.xContent() .createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, regexStream); XContentParser.Token token = yamlParser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { token = yamlParser.nextToken(); for (; token != null; token = yamlParser.nextToken()) { if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) { List> parserConfigurations = readParserConfigurations(yamlParser); for (Map map : parserConfigurations) { uaPatterns.add( new UserAgentSubpattern( compilePattern(map.get("regex"), map.get("regex_flag")), map.get("family_replacement"), map.get("v1_replacement"), map.get("v2_replacement"), map.get("v3_replacement"), map.get("v4_replacement") ) ); } } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) { List> parserConfigurations = readParserConfigurations(yamlParser); for (Map map : parserConfigurations) { osPatterns.add( new UserAgentSubpattern( compilePattern(map.get("regex"), map.get("regex_flag")), map.get("os_replacement"), map.get("os_v1_replacement"), map.get("os_v2_replacement"), map.get("os_v3_replacement"), map.get("os_v4_replacement") ) ); } } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) { List> parserConfigurations = readParserConfigurations(yamlParser); for (Map map : parserConfigurations) { devicePatterns.add( new UserAgentSubpattern( compilePattern(map.get("regex"), map.get("regex_flag")), map.get("device_replacement"), null, null, null, null ) ); } } } } if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) { throw new OpenSearchParseException("not a valid regular expression file"); } } private Pattern compilePattern(String regex, String regex_flag) { // Only flag present in the current default regexes.yaml if (regex_flag != null && regex_flag.equals("i")) { return Pattern.compile(regex, Pattern.CASE_INSENSITIVE); } else { return Pattern.compile(regex); } } private List> readParserConfigurations(XContentParser yamlParser) throws IOException { List> patternList = new ArrayList<>(); XContentParser.Token token = yamlParser.nextToken(); if (token != XContentParser.Token.START_ARRAY) { throw new OpenSearchParseException("malformed regular expression file, should continue with 'array' after 'object'"); } token = yamlParser.nextToken(); if (token != XContentParser.Token.START_OBJECT) { throw new OpenSearchParseException("malformed regular expression file, expecting 'object'"); } while (token == XContentParser.Token.START_OBJECT) { token = yamlParser.nextToken(); if (token != XContentParser.Token.FIELD_NAME) { throw new OpenSearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'"); } Map regexMap = new HashMap<>(); for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) { String fieldName = yamlParser.currentName(); token = yamlParser.nextToken(); String fieldValue = yamlParser.text(); regexMap.put(fieldName, fieldValue); } patternList.add(regexMap); token = yamlParser.nextToken(); } return patternList; } List getUaPatterns() { return uaPatterns; } List getOsPatterns() { return osPatterns; } List getDevicePatterns() { return devicePatterns; } String getName() { return name; } public Details parse(String agentString) { Details details = cache.get(name, agentString); if (details == null) { VersionedName userAgent = findMatch(uaPatterns, agentString); VersionedName operatingSystem = findMatch(osPatterns, agentString); VersionedName device = findMatch(devicePatterns, agentString); details = new Details(userAgent, operatingSystem, device); cache.put(name, agentString, details); } return details; } private VersionedName findMatch(List possiblePatterns, String agentString) { VersionedName name; for (UserAgentSubpattern pattern : possiblePatterns) { name = pattern.match(agentString); if (name != null) { return name; } } return null; } static final class Details { public final VersionedName userAgent; public final VersionedName operatingSystem; public final VersionedName device; Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) { this.userAgent = userAgent; this.operatingSystem = operatingSystem; this.device = device; } } static final class VersionedName { public final String name; public final String major; public final String minor; public final String patch; public final String build; VersionedName(String name, String major, String minor, String patch, String build) { this.name = name; this.major = major; this.minor = minor; this.patch = patch; this.build = build; } } /** * One of: user agent, operating system, device */ static final class UserAgentSubpattern { private final Pattern pattern; private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement; UserAgentSubpattern( Pattern pattern, String nameReplacement, String v1Replacement, String v2Replacement, String v3Replacement, String v4Replacement ) { this.pattern = pattern; this.nameReplacement = nameReplacement; this.v1Replacement = v1Replacement; this.v2Replacement = v2Replacement; this.v3Replacement = v3Replacement; this.v4Replacement = v4Replacement; } public VersionedName match(String agentString) { String name = null, major = null, minor = null, patch = null, build = null; Matcher matcher = pattern.matcher(agentString); if (!matcher.find()) { return null; } int groupCount = matcher.groupCount(); if (nameReplacement != null) { if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) { name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1))); } else { name = nameReplacement; } } else if (groupCount >= 1) { name = matcher.group(1); } if (v1Replacement != null) { major = v1Replacement; } else if (groupCount >= 2) { major = matcher.group(2); } if (v2Replacement != null) { minor = v2Replacement; } else if (groupCount >= 3) { minor = matcher.group(3); } if (v3Replacement != null) { patch = v3Replacement; } else if (groupCount >= 4) { patch = matcher.group(4); } if (v4Replacement != null) { build = v4Replacement; } else if (groupCount >= 5) { build = matcher.group(5); } return name == null ? null : new VersionedName(name, major, minor, patch, build); } } }