# SPDX-License-Identifier: Apache-2.0
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Any modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

#  Licensed to Elasticsearch B.V. under one or more contributor
#  license agreements. See the NOTICE file distributed with
#  this work for additional information regarding copyright
#  ownership. Elasticsearch B.V. licenses this file to you under
#  the Apache License, Version 2.0 (the "License"); you may
#  not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
# 	http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing,
#  software distributed under the License is distributed on an
#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#  KIND, either express or implied.  See the License for the
#  specific language governing permissions and limitations
#  under the License.

import os

import opensearchpy
import pandas as pd
from opensearchpy import OpenSearch

from opensearch_py_ml.common import os_version

ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

# Define test files and indices

OPENSEARCH_HOST = "https://instance:9200"
OPENSEARCH_ADMIN_USER, OPENSEARCH_ADMIN_PASSWORD = "admin", "admin"

# Define client to use in tests
OPENSEARCH_TEST_CLIENT = OpenSearch(
    hosts=[OPENSEARCH_HOST],
    http_auth=(OPENSEARCH_ADMIN_USER, OPENSEARCH_ADMIN_PASSWORD),
    verify_certs=False,
)
# in github integration test, host url is: https://instance:9200
# in development, usually host url is: https://localhost:9200
# it's hard to remember changing the host url. So applied a try catch so that we don't have to keep change this config
try:
    OS_VERSION = os_version(OPENSEARCH_TEST_CLIENT)
except opensearchpy.exceptions.ConnectionError:
    OPENSEARCH_HOST = "https://localhost:9200"
    # Define client to use in tests
    OPENSEARCH_TEST_CLIENT = OpenSearch(
        hosts=[OPENSEARCH_HOST],
        http_auth=(OPENSEARCH_ADMIN_USER, OPENSEARCH_ADMIN_PASSWORD),
        verify_certs=False,
    )
    OS_VERSION = os_version(OPENSEARCH_TEST_CLIENT)

FLIGHTS_INDEX_NAME = "flights"
FLIGHTS_MAPPING = {
    "mappings": {
        "properties": {
            "AvgTicketPrice": {"type": "float"},
            "Cancelled": {"type": "boolean"},
            "Carrier": {"type": "keyword"},
            "Dest": {"type": "keyword"},
            "DestAirportID": {"type": "keyword"},
            "DestCityName": {"type": "keyword"},
            "DestCountry": {"type": "keyword"},
            "DestLocation": {"type": "geo_point"},
            "DestRegion": {"type": "keyword"},
            "DestWeather": {"type": "keyword"},
            "DistanceKilometers": {"type": "float"},
            "DistanceMiles": {"type": "float"},
            "FlightDelay": {"type": "boolean"},
            "FlightDelayMin": {"type": "integer"},
            "FlightDelayType": {"type": "keyword"},
            "FlightNum": {"type": "keyword"},
            "FlightTimeHour": {"type": "float"},
            "FlightTimeMin": {"type": "float"},
            "Origin": {"type": "keyword"},
            "OriginAirportID": {"type": "keyword"},
            "OriginCityName": {"type": "keyword"},
            "OriginCountry": {"type": "keyword"},
            "OriginLocation": {"type": "geo_point"},
            "OriginRegion": {"type": "keyword"},
            "OriginWeather": {"type": "keyword"},
            "dayOfWeek": {"type": "byte"},
            "timestamp": {"type": "date", "format": "strict_date_hour_minute_second"},
        }
    }
}
FLIGHTS_FILE_NAME = ROOT_DIR + "/flights.json.gz"
FLIGHTS_DF_FILE_NAME = ROOT_DIR + "/flights_df.json.gz"

FLIGHTS_SMALL_INDEX_NAME = "flights_small"
FLIGHTS_SMALL_MAPPING = FLIGHTS_MAPPING
FLIGHTS_SMALL_FILE_NAME = ROOT_DIR + "/flights_small.json.gz"

ECOMMERCE_INDEX_NAME = "ecommerce"
ECOMMERCE_MAPPING = {
    "mappings": {
        "properties": {
            "category": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
            "currency": {"type": "keyword"},
            "customer_birth_date": {"type": "date"},
            "customer_first_name": {
                "type": "text",
                "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
            },
            "customer_full_name": {
                "type": "text",
                "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
            },
            "customer_gender": {"type": "text"},
            "customer_id": {"type": "keyword"},
            "customer_last_name": {
                "type": "text",
                "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
            },
            "customer_phone": {"type": "keyword"},
            "day_of_week": {"type": "keyword"},
            "day_of_week_i": {"type": "integer"},
            "email": {"type": "keyword"},
            "geoip": {
                "properties": {
                    "city_name": {"type": "keyword"},
                    "continent_name": {"type": "keyword"},
                    "country_iso_code": {"type": "keyword"},
                    "location": {"type": "geo_point"},
                    "region_name": {"type": "keyword"},
                }
            },
            "manufacturer": {
                "type": "text",
                "fields": {"keyword": {"type": "keyword"}},
            },
            "order_date": {"type": "date"},
            "order_id": {"type": "keyword"},
            "products": {
                "properties": {
                    "_id": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                    },
                    "base_price": {"type": "half_float"},
                    "base_unit_price": {"type": "half_float"},
                    "category": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "created_on": {"type": "date"},
                    "discount_amount": {"type": "half_float"},
                    "discount_percentage": {"type": "half_float"},
                    "manufacturer": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                    },
                    "min_price": {"type": "half_float"},
                    "price": {"type": "half_float"},
                    "product_id": {"type": "long"},
                    "product_name": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword"}},
                        "analyzer": "english",
                    },
                    "quantity": {"type": "integer"},
                    "sku": {"type": "keyword"},
                    "tax_amount": {"type": "half_float"},
                    "taxful_price": {"type": "half_float"},
                    "taxless_price": {"type": "half_float"},
                    "unit_discount_amount": {"type": "half_float"},
                }
            },
            "sku": {"type": "keyword"},
            "taxful_total_price": {"type": "float"},
            "taxless_total_price": {"type": "float"},
            "total_quantity": {"type": "integer"},
            "total_unique_products": {"type": "integer"},
            "type": {"type": "keyword"},
            "user": {"type": "keyword"},
        }
    }
}
ECOMMERCE_FILE_NAME = ROOT_DIR + "/ecommerce.json.gz"
ECOMMERCE_DF_FILE_NAME = ROOT_DIR + "/ecommerce_df.json.gz"

TEST_MAPPING1 = {
    "mappings": {
        "properties": {
            "city": {"type": "text", "fields": {"raw": {"type": "keyword"}}},
            "text": {
                "type": "text",
                "fields": {"english": {"type": "text", "analyzer": "english"}},
            },
            "origin_location": {
                "properties": {
                    "lat": {
                        "type": "text",
                        "index_prefixes": {},
                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                    },
                    "lon": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                    },
                }
            },
            "maps-telemetry": {
                "properties": {
                    "attributesPerMap": {
                        "properties": {
                            "dataSourcesCount": {
                                "properties": {
                                    "avg": {"type": "long"},
                                    "max": {"type": "long"},
                                    "min": {"type": "long"},
                                }
                            },
                            "emsVectorLayersCount": {
                                "dynamic": "true",
                                "properties": {
                                    "france_departments": {
                                        "properties": {
                                            "avg": {"type": "float"},
                                            "max": {"type": "long"},
                                            "min": {"type": "long"},
                                        }
                                    }
                                },
                            },
                        }
                    }
                }
            },
            "type": {"type": "keyword"},
            "name": {"type": "text"},
            "user_name": {"type": "keyword"},
            "email": {"type": "keyword"},
            "content": {"type": "text"},
            "tweeted_at": {"type": "date"},
            "dest_location": {"type": "geo_point"},
            "my_join_field": {
                "type": "join",
                "relations": {"question": ["answer", "comment"], "answer": "vote"},
            },
        }
    }
}

TEST_MAPPING1_INDEX_NAME = "mapping1"

TEST_MAPPING1_EXPECTED = {
    "city": "text",
    "city.raw": "keyword",
    "content": "text",
    "dest_location": "geo_point",
    "email": "keyword",
    "maps-telemetry.attributesPerMap.dataSourcesCount.avg": "long",
    "maps-telemetry.attributesPerMap.dataSourcesCount.max": "long",
    "maps-telemetry.attributesPerMap.dataSourcesCount.min": "long",
    "maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg": "float",
    "maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max": "long",
    "maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min": "long",
    "my_join_field": "join",
    "name": "text",
    "origin_location.lat": "text",
    "origin_location.lat.keyword": "keyword",
    "origin_location.lon": "text",
    "origin_location.lon.keyword": "keyword",
    "text": "text",
    "text.english": "text",
    "tweeted_at": "date",
    "type": "keyword",
    "user_name": "keyword",
}

TEST_MAPPING1_EXPECTED_DF = pd.DataFrame.from_dict(
    data=TEST_MAPPING1_EXPECTED, orient="index", columns=["os_dtype"]
)
TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF = TEST_MAPPING1_EXPECTED_DF.drop(
    index=[
        "city.raw",
        "origin_location.lat.keyword",
        "origin_location.lon.keyword",
        "text.english",
    ]
)
TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(
    TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF.index
)

TEST_NESTED_USER_GROUP_INDEX_NAME = "nested_user_group"
TEST_NESTED_USER_GROUP_MAPPING = {
    "mappings": {
        "properties": {
            "group": {"type": "keyword"},
            "user": {
                "properties": {
                    "first": {"type": "keyword"},
                    "last": {"type": "keyword"},
                    "address": {"type": "keyword"},
                }
            },
        }
    }
}

TEST_NESTED_USER_GROUP_DOCS = [
    {
        "_index": TEST_NESTED_USER_GROUP_INDEX_NAME,
        "_source": {
            "group": "amsterdam",
            "user": [
                {
                    "first": "Manke",
                    "last": "Nelis",
                    "address": ["Elandsgracht", "Amsterdam"],
                },
                {
                    "first": "Johnny",
                    "last": "Jordaan",
                    "address": ["Elandsstraat", "Amsterdam"],
                },
            ],
        },
    },
    {
        "_index": TEST_NESTED_USER_GROUP_INDEX_NAME,
        "_source": {
            "group": "london",
            "user": [
                {"first": "Alice", "last": "Monkton"},
                {"first": "Jimmy", "last": "White", "address": ["London"]},
            ],
        },
    },
    {
        "_index": TEST_NESTED_USER_GROUP_INDEX_NAME,
        "_source": {"group": "new york", "user": [{"first": "Bill", "last": "Jones"}]},
    },
]

ML_FILE_NAME = "all-MiniLM-L6-v2_torchscript_sentence-transformer.zip"
ML_FILE_PATH = ROOT_DIR + "/" + ML_FILE_NAME
ML_FILE_URL = "https://github.com/opensearch-project/ml-commons/raw/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/all-MiniLM-L6-v2_torchscript_sentence-transformer.zip?raw=true"
ML_CONFIG_FILE_PATH = ROOT_DIR + "/model_config.json"