/* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
namespace OpenSearch.Client
{
///
/// Reads Well-Known Text (WKT) into types
///
public class GeoWKTReader
{
///
/// Reads Well-Known Text (WKT) into a new instance of
///
public static IGeoShape Read(string wellKnownText)
{
using (var tokenizer = new WellKnownTextTokenizer(new StringReader(wellKnownText)))
return Read(tokenizer, null);
}
private static IGeoShape Read(WellKnownTextTokenizer tokenizer, string shapeType)
{
var token = tokenizer.NextToken();
if (token != TokenType.Word)
throw new GeoWKTException(
$"Expected word but found {tokenizer.TokenString()}", tokenizer.LineNumber, tokenizer.Position);
var type = tokenizer.TokenValue.ToUpperInvariant();
if (shapeType != null && shapeType != GeoShapeType.GeometryCollection && type != shapeType)
throw new GeoWKTException($"Expected geometry type {shapeType} but found {type}");
switch (type)
{
case GeoShapeType.Point:
var point = ParsePoint(tokenizer);
point.Format = GeoFormat.WellKnownText;
return point;
case GeoShapeType.MultiPoint:
var multiPoint = ParseMultiPoint(tokenizer);
multiPoint.Format = GeoFormat.WellKnownText;
return multiPoint;
case GeoShapeType.LineString:
var lineString = ParseLineString(tokenizer);
lineString.Format = GeoFormat.WellKnownText;
return lineString;
case GeoShapeType.MultiLineString:
var multiLineString = ParseMultiLineString(tokenizer);
multiLineString.Format = GeoFormat.WellKnownText;
return multiLineString;
case GeoShapeType.Polygon:
var polygon = ParsePolygon(tokenizer);
polygon.Format = GeoFormat.WellKnownText;
return polygon;
case GeoShapeType.MultiPolygon:
var multiPolygon = ParseMultiPolygon(tokenizer);
multiPolygon.Format = GeoFormat.WellKnownText;
return multiPolygon;
case GeoShapeType.BoundingBox:
var envelope = ParseBoundingBox(tokenizer);
envelope.Format = GeoFormat.WellKnownText;
return envelope;
case GeoShapeType.GeometryCollection:
var geometryCollection = ParseGeometryCollection(tokenizer);
geometryCollection.Format = GeoFormat.WellKnownText;
return geometryCollection;
default:
throw new GeoWKTException($"Unknown geometry type: {type}");
}
}
private static PointGeoShape ParsePoint(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var point = new PointGeoShape(ParseCoordinate(tokenizer));
NextCloser(tokenizer);
return point;
}
private static MultiPointGeoShape ParseMultiPoint(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var coordinates = ParseCoordinates(tokenizer);
return new MultiPointGeoShape(coordinates);
}
private static LineStringGeoShape ParseLineString(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var coordinates = ParseCoordinates(tokenizer);
return new LineStringGeoShape(coordinates);
}
private static MultiLineStringGeoShape ParseMultiLineString(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var coordinates = ParseCoordinateLists(tokenizer);
return new MultiLineStringGeoShape(coordinates);
}
private static PolygonGeoShape ParsePolygon(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var coordinates = ParseCoordinateLists(tokenizer);
return new PolygonGeoShape(coordinates);
}
private static MultiPolygonGeoShape ParseMultiPolygon(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var coordinates = new List>>
{
ParseCoordinateLists(tokenizer)
};
while (NextCloserOrComma(tokenizer) == TokenType.Comma)
coordinates.Add(ParseCoordinateLists(tokenizer));
return new MultiPolygonGeoShape(coordinates);
}
private static EnvelopeGeoShape ParseBoundingBox(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var minLon = NextNumber(tokenizer);
NextComma(tokenizer);
var maxLon = NextNumber(tokenizer);
NextComma(tokenizer);
var maxLat = NextNumber(tokenizer);
NextComma(tokenizer);
var minLat = NextNumber(tokenizer);
NextCloser(tokenizer);
return new EnvelopeGeoShape(new[] { new GeoCoordinate(maxLat, minLon), new GeoCoordinate(minLat, maxLon) });
}
private static GeometryCollection ParseGeometryCollection(WellKnownTextTokenizer tokenizer)
{
if (NextEmptyOrOpen(tokenizer) == TokenType.Word)
return null;
var geometries = new List
{
Read(tokenizer, GeoShapeType.GeometryCollection)
};
while (NextCloserOrComma(tokenizer) == TokenType.Comma)
geometries.Add(Read(tokenizer, null));
return new GeometryCollection { Geometries = geometries };
}
private static List> ParseCoordinateLists(WellKnownTextTokenizer tokenizer)
{
var coordinates = new List>();
NextEmptyOrOpen(tokenizer);
coordinates.Add(ParseCoordinates(tokenizer));
while (NextCloserOrComma(tokenizer) == TokenType.Comma)
{
NextEmptyOrOpen(tokenizer);
coordinates.Add(ParseCoordinates(tokenizer));
}
return coordinates;
}
private static List ParseCoordinates(WellKnownTextTokenizer tokenizer)
{
var coordinates = new List();
if (IsNumberNext(tokenizer) || tokenizer.NextToken() == TokenType.LParen)
coordinates.Add(ParseCoordinate(tokenizer));
while (NextCloserOrComma(tokenizer) == TokenType.Comma)
{
var isOpenParen = false;
if (IsNumberNext(tokenizer) || (isOpenParen = tokenizer.NextToken() == TokenType.LParen))
coordinates.Add(ParseCoordinate(tokenizer));
if (isOpenParen)
NextCloser(tokenizer);
}
return coordinates;
}
private static GeoCoordinate ParseCoordinate(WellKnownTextTokenizer tokenizer)
{
var lon = NextNumber(tokenizer);
var lat = NextNumber(tokenizer);
double? z = null;
if (IsNumberNext(tokenizer))
z = NextNumber(tokenizer);
return z == null
? new GeoCoordinate(lat, lon)
: new GeoCoordinate(lat, lon, z.Value);
}
internal static void NextCloser(WellKnownTextTokenizer tokenizer)
{
if (tokenizer.NextToken() != TokenType.RParen)
throw new GeoWKTException(
$"Expected {(char)WellKnownTextTokenizer.RParen} " +
$"but found: {tokenizer.TokenString()}", tokenizer.LineNumber, tokenizer.Position);
}
private static void NextComma(WellKnownTextTokenizer tokenizer)
{
if (tokenizer.NextToken() != TokenType.Comma)
throw new GeoWKTException(
$"Expected {(char)WellKnownTextTokenizer.Comma} but found: {tokenizer.TokenString()}",
tokenizer.LineNumber,
tokenizer.Position);
}
internal static TokenType NextEmptyOrOpen(WellKnownTextTokenizer tokenizer)
{
var token = tokenizer.NextToken();
if (token == TokenType.LParen ||
token == TokenType.Word && tokenizer.TokenValue.Equals(WellKnownTextTokenizer.Empty, StringComparison.OrdinalIgnoreCase))
return token;
throw new GeoWKTException(
$"Expected {WellKnownTextTokenizer.Empty} or {(char)WellKnownTextTokenizer.LParen} " +
$"but found: {tokenizer.TokenString()}", tokenizer.LineNumber, tokenizer.Position);
}
private static TokenType NextCloserOrComma(WellKnownTextTokenizer tokenizer)
{
var token = tokenizer.NextToken();
if (token == TokenType.Comma || token == TokenType.RParen)
return token;
throw new GeoWKTException(
$"Expected {(char)WellKnownTextTokenizer.Comma} or {(char)WellKnownTextTokenizer.RParen} " +
$"but found: {tokenizer.TokenString()}", tokenizer.LineNumber, tokenizer.Position);
}
internal static double NextNumber(WellKnownTextTokenizer tokenizer)
{
if (tokenizer.NextToken() == TokenType.Word)
{
if (string.Equals(tokenizer.TokenValue, WellKnownTextTokenizer.NaN, StringComparison.OrdinalIgnoreCase))
return double.NaN;
if (double.TryParse(
tokenizer.TokenValue,
NumberStyles.AllowDecimalPoint | NumberStyles.AllowLeadingSign | NumberStyles.AllowExponent,
CultureInfo.InvariantCulture, out var d))
return d;
}
throw new GeoWKTException(
$"Expected number but found: {tokenizer.TokenString()}", tokenizer.LineNumber, tokenizer.Position);
}
internal static bool IsNumberNext(WellKnownTextTokenizer tokenizer)
{
var token = tokenizer.PeekToken();
return token == TokenType.Word;
}
}
///
/// Character types when parsing Well-Known Text
///
internal enum CharacterType : byte
{
Whitespace,
Alpha,
Comment
}
///
/// Well-Known Text token types
///
internal enum TokenType : byte
{
None,
Word,
LParen,
RParen,
Comma
}
///
/// Tokenizes a sequence of characters into Well-Known Text
/// (WKT)
///
internal class WellKnownTextTokenizer : IDisposable
{
public const int CarriageReturn = '\r';
private const int CharacterTypesLength = 256;
public const int Comma = ',';
public const int Comment = '#';
public const int Dot = '.';
public const string Empty = "EMPTY";
public const int Linefeed = '\n';
public const int LParen = '(';
public const int Minus = '-';
public const string NaN = "NAN";
private const int NeedChar = int.MaxValue;
public const int Plus = '+';
public const int RParen = ')';
private static readonly CharacterType[] CharacterTypes = new CharacterType[CharacterTypesLength];
private readonly List _buffer = new List();
private readonly TextReader _reader;
private int _peekChar = NeedChar;
private bool _pushed;
static WellKnownTextTokenizer()
{
// build a map of ASCII chars and their types
// Any unmapped ASCII will be considered whitespace
// and anything > 0 outside of ASCII will be considered alpha.
Chars('a', 'z', CharacterType.Alpha);
Chars('A', 'Z', CharacterType.Alpha);
Chars(128 + 32, 255, CharacterType.Alpha);
Chars('0', '9', CharacterType.Alpha);
Chars(LParen, RParen, CharacterType.Alpha);
Chars(Plus, Plus, CharacterType.Alpha);
Chars(Comma, Comma, CharacterType.Alpha);
Chars(Minus, Dot, CharacterType.Alpha);
Chars(Comment, Comment, CharacterType.Comment);
}
public WellKnownTextTokenizer(TextReader reader) =>
_reader = reader ?? throw new ArgumentNullException(nameof(reader));
///
/// Gets the current line number
///
public int LineNumber { get; private set; } = 1;
///
/// Gets the current position
///
public int Position { get; private set; }
///
/// Gets the current token type
///
public TokenType TokenType { get; private set; } = TokenType.None;
///
/// Gets the current token value
///
public string TokenValue { get; private set; }
///
/// Disposes of the reader from which characters are read
///
public void Dispose() => _reader?.Dispose();
private static void Chars(int low, int high, CharacterType type)
{
if (low < 0)
low = 0;
if (high >= CharacterTypesLength)
high = CharacterTypesLength - 1;
while (low <= high)
CharacterTypes[low++] = type;
}
///
/// A user friendly string for the current token
///
public string TokenString()
{
switch (TokenType)
{
case TokenType.Word:
return TokenValue;
case TokenType.None:
return "END-OF-STREAM";
case TokenType.LParen:
return "(";
case TokenType.RParen:
return ")";
case TokenType.Comma:
return ",";
default:
return $"\'{(char)_peekChar}\'";
}
}
private int Read()
{
Position++;
return _reader.Read();
}
///
/// Peeks at the next token without changing the state
/// of the reader
///
public TokenType PeekToken()
{
var position = Position;
var token = NextToken();
Position = position;
_pushed = true;
return token;
}
///
/// Gets the next token, advancing the position
///
public TokenType NextToken()
{
if (_pushed)
{
_pushed = false;
// Add the length of peeked token
Position += !string.IsNullOrEmpty(TokenValue)
? 1 + TokenValue.Length
: 1;
return TokenType;
}
TokenValue = null;
var c = _peekChar;
if (c < 0)
c = NeedChar;
if (c == NeedChar)
{
c = Read();
if (c < 0)
return TokenType = TokenType.None;
}
// reset the peek character for next token
_peekChar = NeedChar;
var characterType = c < CharacterTypesLength
? CharacterTypes[c]
: CharacterType.Alpha;
// consume all whitespace
while (characterType == CharacterType.Whitespace)
{
if (c == CarriageReturn)
{
LineNumber++;
Position = 0;
c = Read();
if (c == Linefeed)
c = Read();
}
else
{
if (c == Linefeed)
{
LineNumber++;
Position = 0;
}
c = Read();
}
if (c < 0)
return TokenType = TokenType.None;
characterType = c < CharacterTypesLength
? CharacterTypes[c]
: CharacterType.Alpha;
}
switch (c)
{
case LParen:
return TokenType = TokenType.LParen;
case RParen:
return TokenType = TokenType.RParen;
case Comma:
return TokenType = TokenType.Comma;
}
if (characterType == CharacterType.Alpha)
{
var i = 0;
do
{
_buffer.Insert(i++, (char)c);
c = Read();
if (c < 0)
characterType = CharacterType.Whitespace;
else if (c < CharacterTypesLength)
{
if (c == LParen || c == RParen || c == Comma)
break;
characterType = CharacterTypes[c];
}
else
characterType = CharacterType.Alpha;
} while (characterType == CharacterType.Alpha);
_peekChar = c;
TokenValue = new string(_buffer.ToArray(), 0, i);
return TokenType = TokenType.Word;
}
if (characterType == CharacterType.Comment)
{
// consume all characters on comment line
while ((c = Read()) != Linefeed && c != CarriageReturn && c >= 0) { }
_peekChar = c;
return NextToken();
}
return TokenType = TokenType.None;
}
}
}