"""
log_validation..py
====================================
This module validates the logs parsed by auditlogs_parsing.py
"""

import dateutil.parser
import datetime
import re


def is_valid_log(log, start_time, end_time):
    """If query doesn't contain problem statements, saves it."""
    problem_keywords = [
        "INTERNAL QUERY */",
        "DEALLOCATE pdo_stmt_",
        "UNLISTEN *",
        "context: SQL",
        "ERROR:",
        "CONTEXT:  SQL",
        "show ",
        "Undoing transaction",
        "Undo on",
        "pg_terminate_backend",
        "pg_cancel_backend",
        "volt_",
        "pg_temp_",
    ]

    potential_problem_keywords = ["BIND"]

    not_problem_keywords = ["BINDING"]

    if log.username == "rdsdb":
        return False

    if start_time and log.record_time < start_time:
        return False

    if end_time and log.record_time > end_time:
        return False

    if any(word in log.text for word in problem_keywords):
        return False

    if any(word in log.text for word in potential_problem_keywords) and not any(
        word in log.text for word in not_problem_keywords
    ):
        return False

    # filter internal statement rewrites with parameter markers
    if re.search("\$\d", log.text):
        # remove \$\d in string literals ( select '$1' ) or comment blocks ( */ $1 */ )
        text_without_valid_parameter_markers = re.sub(
            """'.*\\$\\d.*'|\\/\\*.*\\$\\d.*\\*\\/""", "", log.text, flags=re.DOTALL
        )
        # remove \$\d in single line quotes ( -- $1 )
        if "--" in log.text:
            text_without_valid_parameter_markers = re.sub(
                "^\s*--.*\$\d", "", text_without_valid_parameter_markers
            )
        # if there are still parameter markers remaining after removing them from valid cases, the query text is invalid
        if re.search("\$\d", text_without_valid_parameter_markers):
            return False

    return True


def is_duplicate(first_query_text, second_query_text):
    dedupe_these = [
        "set",
        "select",
        "create",
        "delete",
        "update",
        "insert",
        "copy",
        "unload",
        "with",
    ]

    first_query_text = first_query_text.strip()
    second_query_text = second_query_text.strip()
    first_query_text_no_semi = first_query_text.replace(";", "")
    second_query_tex_no_semi = second_query_text.replace(";", "")
    second_query_comment_removed = second_query_text
    first_query_comment_removed = first_query_text
    if second_query_text.startswith("/*"):
        second_query_comment_removed = second_query_text[
            second_query_text.find("*/") + 2 : len(second_query_text)
        ].strip()
    if first_query_text.startswith("/*"):
        first_query_comment_removed = first_query_text[
            second_query_text.find("*/") + 2 : len(first_query_text)
        ].strip()
    return (
        (
            first_query_text_no_semi == second_query_tex_no_semi
            and any(second_query_comment_removed.startswith(word) for word in dedupe_these)
        )
        or (
            (second_query_comment_removed.lower().startswith("create"))
            and (first_query_comment_removed.lower().startswith("create"))
            and second_query_comment_removed.endswith(";")
        )
        or (
            (second_query_comment_removed.lower().startswith("drop"))
            and (first_query_comment_removed.lower().startswith("drop"))
            and second_query_comment_removed.endswith(";")
        )
        or (
            (second_query_comment_removed.lower().startswith("alter"))
            and (first_query_comment_removed.lower().startswith("alter"))
            and second_query_comment_removed.endswith(";")
        )
    )


def get_logs_in_range(audit_objects, start_time, end_time):
    start_idx = None
    end_idx = None
    filenames = []
    for index, log in list(enumerate(audit_objects)):
        filename = log["Key"].split("/")[-1]
        file_datetime = dateutil.parser.parse(filename.split("_")[-1][:-3]).replace(
            tzinfo=datetime.timezone.utc
        )
        if start_time and file_datetime < start_time:
            continue

        if end_time and file_datetime > end_time:
            # make sure we've started
            if len(filenames) > 0:
                filenames.append(log["Key"])
            break

        # start with one before the first file to make sure we capture everything
        if len(filenames) == 0 and index > 0:
            filenames.append(audit_objects[index - 1]["Key"])

        filenames.append(log["Key"])

    return filenames


def remove_line_comments(query):
    removed_string = query
    prev_location = 0

    while True:
        line_comment_begin = removed_string.find("--", prev_location)
        prev_location = line_comment_begin

        # no more comments to find
        if line_comment_begin == -1:
            break

        # found_comment = True
        linebreak = removed_string.find("\n", line_comment_begin)
        start_comment = removed_string.find(
            "/*",
            line_comment_begin,
            linebreak if linebreak != -1 else len(removed_string),
        )
        end_comment = removed_string.find(
            "*/",
            line_comment_begin,
            linebreak if linebreak != -1 else len(removed_string),
        )

        if linebreak != -1:
            if start_comment == -1 and end_comment != -1:
                # if line comment is between start and end, then remove until end of comment
                removed_string = removed_string[:line_comment_begin] + removed_string[end_comment:]
            else:
                # else remove up the end of line
                removed_string = removed_string[:line_comment_begin] + removed_string[linebreak:]
        else:
            # reached end of query
            if start_comment == -1 and end_comment != -1:
                # if line comment is between start and end, then remove until end of comment

                removed_string = removed_string[:line_comment_begin] + removed_string[end_comment:]
            else:
                # else remove up the end of line
                removed_string = removed_string[:line_comment_begin]

    return removed_string