from dataclasses import dataclass
from typing import Optional, List
import statistics
import math
import difflib
import re
from uuid import uuid4

from trp.trp2 import TPoint
from trp import FieldKey, FieldValue, Line, Word as trpWord

alphanum_regex_with_space = re.compile(r'[^a-zA-Z 0-9]+')
alphanum_regex_without_space = re.compile(r'[^a-zA-Z0-9]+')
number_regex = re.compile("[0-9]")


def make_alphanum_and_lower_for_non_numbers(word: str, with_space=True) -> str:
    # if contains number, most likely a value and not an identifier
    if number_regex.search(word):
        return word
    if with_space:
        return alphanum_regex_with_space.sub('', str(word)).lower().strip()
    else:
        return alphanum_regex_without_space.sub('', str(word)).lower().strip()


def get_diff_for_alphanum_words(word1: str, word2: str) -> float:
    return get_diff_for_words(word1=make_alphanum_and_lower_for_non_numbers(word=word1),
                              word2=make_alphanum_and_lower_for_non_numbers(word=word2))


def get_diff_for_words(word1: str, word2: str) -> float:
    return difflib.SequenceMatcher(isjunk=None, a=word1, b=word2).ratio()


@dataclass(repr=True)
class TWord():
    text: str
    original_text: str
    text_type: str
    confidence: float
    id: str
    xmin: int
    ymin: int
    xmax: int
    ymax: int
    page_number: int
    doc_width: int
    doc_height: int
    child_relationships: Optional[str] = None
    reference: Optional[str] = None
    resolver: Optional[str] = None

    def __init__(
            self,
            text: str = None,
            original_text: str = None,
            text_type: str = None,    # word, line or phrase
            confidence: float = None,
            id: str = None,
            xmin: int = None,
            ymin: int = None,
            xmax: int = None,
            ymax: int = None,
            page_number: int = None,
            ocrdb_row=None,
            trp_word: trpWord = None,
            doc_width: int = None,
            doc_height: int = None,
            child_relationships: str = "",
            reference: str = None,
            resolver: str = None):
        """
        resolver: textract, tquery, table, forms
        """

        len_word_params = len([x for x in [text, ocrdb_row, trp_word] if x])
        if len_word_params > 1:
            raise ValueError("Only can take one, text or trp_word or word_position or ocrdb_row.")
        if len_word_params == 0:
            raise ValueError("Have to pass in one of text or trp_word or word_position.")

        if text:
            missing_values: List[str] = list()
            if not text_type:
                missing_values.append("text_type")
            else:
                self.text_type = text_type.lower()
            if not text:
                missing_values.append("text")
            else:
                self.text = text.lower()
            if not id:
                missing_values.append("id")
            else:
                self.id = id
            if original_text:
                self.original_text = original_text
            if not confidence:
                missing_values.append("confidence")
            else:
                self.confidence: float = confidence
            if xmin == None or ymin == None or xmax == None or ymax == None:
                missing_values.append("xmin ymin xmax or ymax")
            else:
                self.xmin: int = xmin
                self.ymin: int = ymin
                self.xmax: int = xmax
                self.ymax: int = ymax
            if not page_number:
                missing_values.append("page_number")
            else:
                self.page_number: int = page_number
            if resolver:
                self.resolver = resolver
            if not doc_width or not doc_height:
                missing_values.append("doc_width or doc_height")
            else:
                self.doc_width = doc_width
                self.doc_height = doc_height
            self.child_relationships = child_relationships
            if reference:
                self.reference = reference
            if missing_values:
                raise ValueError(f"missing: {missing_values}")

        if ocrdb_row:
            self.page_number = ocrdb_row[1]
            self.text_type = ocrdb_row[2]
            self.text = ocrdb_row[3]
            self.original_text = ocrdb_row[4]
            self.confidence = ocrdb_row[5]
            self.xmin = ocrdb_row[6]
            self.ymin = ocrdb_row[7]
            self.xmax = ocrdb_row[8]
            self.ymax = ocrdb_row[9]
            self.id = ocrdb_row[10]
            self.doc_width = ocrdb_row[11]
            self.doc_height = ocrdb_row[12]
            self.child_relationships = ocrdb_row[13]
            self.reference = ocrdb_row[14]

        if trp_word:
            if not (doc_width and doc_height and page_number):
                raise ValueError(
                    f"when using trp_word, need doc_width and doc_height and page_number parameters as well. \
                    doc_width: {doc_width}, doc_height: {doc_height}, page_number: {page_number}")
            if isinstance(trp_word, FieldKey) or isinstance(trp_word, FieldValue):
                self.text = trp_word.text.lower()
                self.text_type = 'KEY' if isinstance(trp_word, FieldKey) else 'VALUE'
                self.original_text = trp_word.text
                if reference:
                    self.reference = reference
            if isinstance(trp_word, trpWord):
                self.text = trp_word.text.lower()
                self.text_type = 'word'
                self.original_text = trp_word.text
            self.confidence = trp_word.confidence
            bbox_width = trp_word.geometry.boundingBox.width
            bbox_height = trp_word.geometry.boundingBox.height
            bbox_left = trp_word.geometry.boundingBox.left
            bbox_top = trp_word.geometry.boundingBox.top
            self.xmin = round(bbox_left * doc_width)
            self.ymin = round(bbox_top * doc_height)
            self.xmax = round(self.xmin + (bbox_width * doc_width))
            self.ymax = round(self.ymin + (bbox_height * doc_height))
            self.page_number = page_number
            if resolver:
                self.resolver = resolver
            self.id = trp_word.id
            self.doc_width = doc_width
            self.doc_height = doc_height
            if isinstance(trp_word, Line):
                self.child_relationships = ",".join([x.id for x in trp_word.words])
            else:
                self.child_relationships = ""

    # def __repr__(self) -> str:
    #     return f"text: {self.text} original_text: {self.original_text} text_type: {self.text_type} confidence: {self.confidence} id: {self.id} xmin: {self.xmin} ymin: {self.ymin} xmax: {self.xmax} ymax: {self.ymax} page_number: {self.page_number} doc_width: {self.doc_width} doc_height: {self.doc_height} child_relationships: {self.child_relationships} reference: {self.reference} resolver: Optional[str] "

    def __eq__(self, o: object) -> bool:
        return isinstance(o, TWord) and self.id == o.id

    def __ne__(self, o: object) -> bool:
        return not self.__eq__

    def __gt__(self, o) -> bool:
        return isinstance(o, TWord) and self.id > o.id

    def __lt__(self, o) -> bool:
        return isinstance(o, TWord) and self.id < o.id

    @property
    def center(self) -> TPoint:
        return TPoint(x=(self.xmin + self.xmax) / 2, y=(self.ymin + self.ymax) / 2)

    @property
    def height(self) -> float:
        return abs(self.ymax - self.ymin)

    def euclid_distance(self, other_tword) -> float:
        center1 = self.center
        center2 = other_tword.center
        return math.dist((center1.x, center1.y), (center2.x, center2.y))

    @staticmethod
    def combine_multiple_words_to_phrase(tword_list: "list[TWord]") -> "TWord":
        """
        word_array = trp.Word objects

        get xmin, ymin, xmax, ymax for both words and combine them with space 'word1 word2' and insert
        simple calculation of new average confidence (conf1 + conf2) / 2
        returns tuble ('word1 word2', xmin, ymin, xmax, ymax)"""
        if not tword_list:
            raise ValueError(f"tword_list is empty.")
        phrase = " ".join([x.text for x in tword_list])
        original_text = " ".join([x.original_text for x in tword_list if x.original_text])
        text_type = 'phrase'
        xmin = min([x.xmin for x in tword_list])
        xmax = max([x.xmax for x in tword_list])
        ymin = min([x.ymin for x in tword_list])
        ymax = max([x.ymax for x in tword_list])
        page_number = int(tword_list[0].page_number)
        confidence = statistics.mean([x.confidence for x in tword_list])
        doc_width = tword_list[0].doc_width
        doc_height = tword_list[0].doc_height
        return TWord(page_number=page_number,
                     original_text=original_text,
                     text_type=text_type,
                     text=phrase,
                     confidence=confidence,
                     xmin=xmin,
                     ymin=ymin,
                     xmax=xmax,
                     ymax=ymax,
                     id=str(uuid4()),
                     doc_width=doc_width,
                     doc_height=doc_height)

    def __eq__(self, obj):
        return isinstance(obj, TWord) and obj.id == self.id

    @property
    def area(self):
        return abs(self.xmax - self.xmin) * abs(self.ymax - self.ymin)

    def get_tupel(self):
        return (self.text, self.confidence, self.xmin, self.ymin, self.xmax, self.ymax)