// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include "arrow/json/rapidjson_defs.h" #include "rapidjson/document.h" #include "rapidjson/prettywriter.h" #include "rapidjson/reader.h" #include "rapidjson/writer.h" #include "arrow/io/memory.h" #include "arrow/json/converter.h" #include "arrow/json/options.h" #include "arrow/json/parser.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/util/string_view.h" #include "arrow/visitor_inline.h" namespace arrow { namespace json { namespace rj = arrow::rapidjson; using rj::StringBuffer; using util::string_view; using Writer = rj::Writer; inline static Status OK(bool ok) { return ok ? Status::OK() : Status::Invalid(""); } template inline static Status Generate(const std::shared_ptr& type, Engine& e, Writer* writer); template inline static Status Generate(const std::vector>& fields, Engine& e, Writer* writer); template inline static Status Generate(const std::shared_ptr& schm, Engine& e, Writer* writer) { return Generate(schm->fields(), e, writer); } template struct GenerateImpl { Status Visit(const NullType&) { return OK(writer.Null()); } Status Visit(const BooleanType&) { return OK(writer.Bool(std::uniform_int_distribution{}(e)&1)); } template enable_if_physical_unsigned_integer Visit(const T&) { auto val = std::uniform_int_distribution<>{}(e); return OK(writer.Uint64(static_cast(val))); } template enable_if_physical_signed_integer Visit(const T&) { auto val = std::uniform_int_distribution<>{}(e); return OK(writer.Int64(static_cast(val))); } template enable_if_physical_floating_point Visit(const T&) { auto val = std::normal_distribution{0, 1 << 10}(e); return OK(writer.Double(val)); } template enable_if_base_binary Visit(const T&) { auto size = std::poisson_distribution<>{4}(e); std::uniform_int_distribution gen_char(32, 127); // FIXME generate UTF8 std::string s(size, '\0'); for (char& ch : s) ch = static_cast(gen_char(e)); return OK(writer.String(s.c_str())); } template enable_if_list_like Visit(const T& t) { auto size = std::poisson_distribution<>{4}(e); writer.StartArray(); for (int i = 0; i < size; ++i) RETURN_NOT_OK(Generate(t.value_type(), e, &writer)); return OK(writer.EndArray(size)); } Status Visit(const StructType& t) { return Generate(t.fields(), e, &writer); } Status Visit(const DayTimeIntervalType& t) { return NotImplemented(t); } Status Visit(const DictionaryType& t) { return NotImplemented(t); } Status Visit(const ExtensionType& t) { return NotImplemented(t); } Status Visit(const Decimal128Type& t) { return NotImplemented(t); } Status Visit(const FixedSizeBinaryType& t) { return NotImplemented(t); } Status Visit(const UnionType& t) { return NotImplemented(t); } Status NotImplemented(const DataType& t) { return Status::NotImplemented("random generation of arrays of type ", t); } Engine& e; rj::Writer& writer; }; template inline static Status Generate(const std::shared_ptr& type, Engine& e, Writer* writer) { if (std::uniform_real_distribution<>{0, 1}(e) < .2) { // one out of 5 chance of null, anywhere writer->Null(); return Status::OK(); } GenerateImpl visitor = {e, *writer}; return VisitTypeInline(*type, &visitor); } template inline static Status Generate(const std::vector>& fields, Engine& e, Writer* writer) { RETURN_NOT_OK(OK(writer->StartObject())); for (const auto& f : fields) { writer->Key(f->name().c_str()); RETURN_NOT_OK(Generate(f->type(), e, writer)); } return OK(writer->EndObject(static_cast(fields.size()))); } inline static Status MakeStream(string_view src_str, std::shared_ptr* out) { auto src = std::make_shared(src_str); *out = std::make_shared(src); return Status::OK(); } // scalar values (numbers and strings) are parsed into a // dictionary. This can be decoded for ease of comparison inline static Status DecodeStringDictionary(const DictionaryArray& dict_array, std::shared_ptr* decoded) { const StringArray& dict = static_cast(*dict_array.dictionary()); const Int32Array& indices = static_cast(*dict_array.indices()); StringBuilder builder; RETURN_NOT_OK(builder.Resize(indices.length())); for (int64_t i = 0; i < indices.length(); ++i) { if (indices.IsNull(i)) { builder.UnsafeAppendNull(); continue; } auto value = dict.GetView(indices.GetView(i)); RETURN_NOT_OK(builder.ReserveData(value.size())); builder.UnsafeAppend(value); } return builder.Finish(decoded); } inline static Status ParseFromString(ParseOptions options, string_view src_str, std::shared_ptr* parsed) { auto src = std::make_shared(src_str); std::unique_ptr parser; RETURN_NOT_OK(BlockParser::Make(options, &parser)); RETURN_NOT_OK(parser->Parse(src)); return parser->Finish(parsed); } static inline std::string PrettyPrint(string_view one_line) { rj::Document document; // Must pass size to avoid ASAN issues. document.Parse(one_line.data(), one_line.size()); rj::StringBuffer sb; rj::PrettyWriter writer(sb); document.Accept(writer); return sb.GetString(); } inline static std::string scalars_only_src() { return R"( { "hello": 3.5, "world": false, "yo": "thing" } { "hello": 3.25, "world": null } { "hello": 3.125, "world": null, "yo": "\u5fcd" } { "hello": 0.0, "world": true, "yo": null } )"; } inline static std::string nested_src() { return R"( { "hello": 3.5, "world": false, "yo": "thing", "arr": [1, 2, 3], "nuf": {} } { "hello": 3.25, "world": null, "arr": [2], "nuf": null } { "hello": 3.125, "world": null, "yo": "\u5fcd", "arr": [], "nuf": { "ps": 78 } } { "hello": 0.0, "world": true, "yo": null, "arr": null, "nuf": { "ps": 90 } } )"; } inline static std::string null_src() { return R"( { "plain": null, "list1": [], "list2": [], "struct": { "plain": null } } { "plain": null, "list1": [], "list2": [null], "struct": {} } )"; } } // namespace json } // namespace arrow