#!/usr/bin/env python
######################################################################################################################
#  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.                                                #
#                                                                                                                    #
#  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    #
#  with the License. A copy of the License is located at                                                             #
#                                                                                                                    #
#      http://www.apache.org/licenses/LICENSE-2.0                                                                    #
#                                                                                                                    #
#  or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES #
#  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    #
#  and limitations under the License.                                                                                #
######################################################################################################################

import json
import os
import re

import newscatcher
from shared_util import custom_logging

logger = custom_logging.get_logger(__name__)


def validate_2_char_iso_code(str_iso_code):
    """
    Function to validate that the ISO code specified is 2 characters. The current
    newscatcher library only supports 2 character ISO codes. This is to reduce
    any SQL injection scenarios
    """
    if len(str_iso_code) == 2:
        return re.match(r"[a-zA-Z]{2}", str_iso_code, re.I).string
    else:
        logger.error(f"Not a 2 character ISO code {str_iso_code}")
        raise TypeError((f"Not a 2 character ISO code {str_iso_code}"))


def validate_topic(str_topic):
    topics = [
        "tech",
        "news",
        "business",
        "science",
        "finance",
        "food",
        "politics",
        "economics",
        "travel",
        "entertainment",
        "music",
        "sport",
        "world",
    ]

    if str_topic:
        str_topic_lower = str_topic.lower()

        if str_topic_lower in topics:
            return str_topic_lower
        else:
            logger.error(f"Topic {str_topic} for newscatcher is not valid")
            raise TypeError(f"Topic {str_topic} for newscatcher is not valid")


def retrieve_urls_using_json(param_str):
    """
    This function calls the newscatcher api and returns the urls to be invoked. The
    input parameter is a JSON string which is converted to a dictionary and then processed
    """
    logger.info(f"Parameters to retrieve list of urls to query: {param_str}")

    param_dict = json.loads(param_str)
    country = param_dict.get("country", None)
    language = param_dict.get("language", None)
    topics = param_dict.get("topic", None)

    url_list = list()

    topic_list = None
    if topics:
        topic_list = topics.split(",")

    if topic_list and len(topic_list) > 0 and "ALL" not in topic_list:
        for topic in topic_list:
            temp_url_list = retrieve_urls(
                country=country,
                language=language,
                topic=topic,
            )

            if temp_url_list:
                url_list = url_list + temp_url_list
    else:
        url_list = retrieve_urls(country=country, language=language)

    return url_list


def retrieve_urls(country=None, language=None, topic=None):
    """
    This function calls the newscatcher api and returns the urls to be invoked.
    """
    try:
        if country:
            country = None if country == "ALL" else validate_2_char_iso_code(country)

        if language:
            language = None if language == "ALL" else validate_2_char_iso_code(language)

        if topic:
            topic = None if topic == "ALL" else validate_topic(topic)

        logger.info(f"Parameters to retrieve list are - country:{country}, language:{language}, topic:{topic}")
        url_list = newscatcher.urls(country=country, language=language, topic=topic)
        logger.debug(f"retrieved url list: {url_list}")
        return url_list

    except TypeError:
        logger.error("Fetching urls threw an Error")
        raise