import os
import timeit
from random import shuffle
from unittest import TestCase

from constants import REPLACE_WITH_PII_ENTITY_TYPE
from data_object import Document, RedactionConfig
from exceptions import InvalidConfigurationException
from processors import Redactor, Segmenter

this_module_path = os.path.dirname(__file__)


class ProcessorsTest(TestCase):
    def test_segmenter_basic_text(self):
        segmentor = Segmenter(50, overlap_tokens=3)
        original_text = "Barack Hussein Obama II is an American politician and attorney who served as the " \
                        "44th president of the United States from 2009 to 2017."
        segments = segmentor.segment(original_text)
        expected_segments = [
            "Barack Hussein Obama II is an American politician ",
            "an American politician and attorney who served as ",
            "who served as the 44th president of the United ",
            "of the United States from 2009 to 2017."]
        for expected_segment, actual_segment in zip(expected_segments, segments):
            assert expected_segment == actual_segment.text
        shuffle(segments)
        assert segmentor.de_segment(segments).text == original_text


    def test_segmenter_no_segmentation_needed(self):
        segmentor = Segmenter(5000, overlap_tokens=3)
        original_text = "Barack Hussein Obama II is an American politician and attorney who served as the " \
                        "44th president of the United States from 2009 to 2017."
        segments = segmentor.segment(original_text)
        assert len(segments) == 1
        assert segments[0].text == original_text
        assert segmentor.de_segment(segments).text == original_text

    def test_segmenter_max_chars_limit(self):
        segmentor = Segmenter(50, overlap_tokens=3, max_overlapping_chars=20)
        original_text = "BarackHusseinObamaIIisanAmerican politicianandattorneywhoservedasthe " \
                        "44th president of the United Statesfrom2009to2017."
        segments = segmentor.segment(original_text)
        expected_segments = [
            "BarackHusseinObamaIIisanAmerican ",
            "nObamaIIisanAmerican politician",
            "nAmerican politicianandattorneywhoservedasthe ",
            "torneywhoservedasthe 44th president of the United ",
            "of the United Statesfrom2009to2017.", ]
        for expected_segment, actual_segment in zip(expected_segments, segments):
            assert expected_segment == actual_segment.text
        shuffle(segments)
        assert segmentor.de_segment(segments).text == original_text

    def test_segmenter_unicode_chars(self):
        segmentor = Segmenter(100, overlap_tokens=3)
        original_text = "ʕ•́ᴥ•̀ʔっ♡ Emoticons 😜 ʕ•́ᴥ•̀ʔっ♡ Emoticons 😜 ᗷᙓ ò¥¥¥¥¥¥¥ᗢᖇᓮᘐᓰﬡᗩᒪ ℬ℮ ¢◎øł Bᴇ ʏᴏᴜʀsᴇʟғ विकिपीडिया सभी विषयों पर प्रामाणिक और उपयोग, " \
                        "परिवर्तन व पुनर्वितरण के लिए स्वतन्त्र ज्ञानकोश बनाने hànbǎobāo, hànbǎo 汉堡包/漢堡包, 汉堡/漢堡 – hamburger"
        segments = segmentor.segment(original_text)
        expected_segments = [
            "ʕ•́ᴥ•̀ʔっ♡ Emoticons 😜 ʕ•́ᴥ•̀ʔっ♡ Emoticons 😜 ᗷᙓ ",
            "Emoticons 😜 ᗷᙓ ò¥¥¥¥¥¥¥ᗢᖇᓮᘐᓰﬡᗩᒪ ℬ℮ ¢◎øł Bᴇ ",
            "ℬ℮ ¢◎øł Bᴇ ʏᴏᴜʀsᴇʟғ विकिपीडिया सभी ",
            "ʏᴏᴜʀsᴇʟғ विकिपीडिया सभी विषयों पर ",
            "सभी विषयों पर प्रामाणिक और उपयोग, ",
            "प्रामाणिक और उपयोग, परिवर्तन व ",
            "उपयोग, परिवर्तन व पुनर्वितरण के लिए ",
            "पुनर्वितरण के लिए स्वतन्त्र ",
            "के लिए स्वतन्त्र ज्ञानकोश बनाने hànbǎobāo, ",
            "ज्ञानकोश बनाने hànbǎobāo, hànbǎo 汉堡包/漢堡包, 汉堡/漢堡 ",
            "hànbǎo 汉堡包/漢堡包, 汉堡/漢堡 – hamburger"]
        assert len(expected_segments) == len(segments)
        for expected_segment, actual_segment in zip(expected_segments, segments):
            assert expected_segment == actual_segment.text
        assert segmentor.de_segment(segments).text == original_text

    def test_desegment_overlapping_results(self):
        segments = [
            Document(text="Some Random SSN Some Random email-id Some Random name and address and some credit card number", char_offset=0,
                     pii_classification={'SSN': 0.234, 'EMAIL': 0.765, 'NAME': 0.124, 'ADDRESS': 0.976},
                     pii_entities=[{'Score': 0.234, 'Type': 'SSN', 'BeginOffset': 12, 'EndOffset': 36},
                                   {'Score': 0.765, 'Type': 'EMAIL', 'BeginOffset': 28, 'EndOffset': 36},
                                   {'Score': 0.534, 'Type': 'NAME', 'BeginOffset': 49, 'EndOffset': 53},
                                   {'Score': 0.234, 'Type': 'ADDRESS', 'BeginOffset': 58, 'EndOffset': 65}]),
            Document(text="Some Random name and address and some credit card number", char_offset=37,
                     pii_classification={'SSN': 0.234, 'EMAIL': 0.765, 'USERNAME': 0.424, 'ADDRESS': 0.976},
                     pii_entities=[{'Score': 0.234, 'Type': 'USERNAME', 'BeginOffset': 12, 'EndOffset': 16},
                                   {'Score': 0.634, 'Type': 'ADDRESS', 'BeginOffset': 17, 'EndOffset': 28},
                                   {'Score': 0.234, 'Type': 'CREDIT_DEBIT_NUMBER', 'BeginOffset': 38, 'EndOffset': 56}])]
        segmentor = Segmenter(5000)
        expected_merged_document = Document(
            text="Some Random SSN Some Random email-id Some Random name and address and some credit card number", char_offset=37,
            pii_classification={'SSN': 0.234, 'EMAIL': 0.765, 'NAME': 0.124, 'USERNAME': 0.424, 'ADDRESS': 0.976},
            pii_entities=[{'Score': 0.234, 'Type': 'SSN', 'BeginOffset': 12, 'EndOffset': 36},
                          {'Score': 0.765, 'Type': 'EMAIL', 'BeginOffset': 28, 'EndOffset': 36},
                          {'Score': 0.534, 'Type': 'NAME', 'BeginOffset': 49, 'EndOffset': 53},
                          {'Score': 0.634, 'Type': 'ADDRESS', 'BeginOffset': 54, 'EndOffset': 65},
                          {'Score': 0.234, 'Type': 'CREDIT_DEBIT_NUMBER', 'BeginOffset': 75, 'EndOffset': 93}])
        actual_merged_doc = segmentor.de_segment(segments)
        assert expected_merged_document.text == actual_merged_doc.text
        assert expected_merged_document.pii_classification == actual_merged_doc.pii_classification
        assert expected_merged_document.pii_entities == actual_merged_doc.pii_entities


    def test_is_overlapping_annotations(self):
        segmentor = Segmenter(5000)
        assert segmentor._is_overlapping_annotations({'Score': 0.634, 'Type': 'ADDRESS', 'BeginOffset': 54, 'EndOffset': 65},
                                                     {'Score': 0.234, 'Type': 'ADDRESS', 'BeginOffset': 58, 'EndOffset': 65}) == 0

    def test_segmenter_scalablity_test(self):
        # 1MB of text should be segmented with around 30 ms latency
        setup = """
import os    
from processors import Segmenter

text=" Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
one_mb_text=" "
for i in range(7751):
    one_mb_text += text 
segmenter = Segmenter(overlap_tokens=20, max_doc_size=5000)
        """.format(this_module_path)
        segmentation_time = timeit.timeit("segmenter.segment(one_mb_text)", setup=setup, number=100)
        assert segmentation_time < 15

    def test_redaction_with_no_entities(self):
        text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
        redactor = Redactor(RedactionConfig())
        redacted_text = redactor.redact(text, [])
        assert text == redacted_text

    def test_redaction_default_redaction_config(self):
        text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
        redactor = Redactor(RedactionConfig())
        redacted_text = redactor.redact(text, [{'Score': 0.234, 'Type': 'NAME', 'BeginOffset': 6, 'EndOffset': 16},
                                               {'Score': 0.765, 'Type': 'CREDIT_DEBIT_NUMBER', 'BeginOffset': 77, 'EndOffset': 96}])
        expected_redaction = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account ******************* has a minimum payment of $24.53"
        assert expected_redaction == redacted_text

    def test_redaction_with_replace_entity_type(self):
        text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
        redactor = Redactor(RedactionConfig(pii_entity_types=['NAME'], mask_mode=REPLACE_WITH_PII_ENTITY_TYPE, confidence_threshold=0.6))
        redacted_text = redactor.redact(text, [{'Score': 0.634, 'Type': 'NAME', 'BeginOffset': 6, 'EndOffset': 15},
                                               {'Score': 0.765, 'Type': 'CREDIT_DEBIT_NUMBER', 'BeginOffset': 77, 'EndOffset': 96}])
        expected_redaction = "Hello [NAME]. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
        assert expected_redaction == redacted_text

    def test_segmenter_constructor_invalid_args(self):
        try:
            Segmenter(3)
            assert False, "Expected an InvalidConfigurationException"
        except InvalidConfigurationException:
            return