B _oaB#@sy ddlZWnek r(ddlZYnXddlZddlmZddlmZddlm Z ddl m Z ddl m Z mZmZmZmZddlmZdd lmZmZmZmZmZmZe ed eed d d Ze ed eed ddZe ed eeed ddZe ed eed ddZ eed ddZ!e ed eed ddZ"e ed eed ddZ#e ed eed ddZ$e ed eed ddZ%e ed eed ddZ&eed d d!Z'e ed eed d"d#Z(e ed eed d$d%Z)e ed eed d&d'Z*e ed eed d(d)Z+e ed eed d*d+Z,e e-ed eed,d-d.Z.dIe/e0eed0d1d2Z1e d3d eed4d5d6Z2e/eeee/fd7d8d9Z3eed:d;d<Z4dJeeed>d?d@Z5ee edAdBdCZ6eee7dDdEdFZ8eeedDdGdHZ9dS)KN)IncrementalDecoder)aliases) lru_cache)findall)ListOptionalSetTupleUnion)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize) characterreturncCsTyt|}Wntk r"dSXd|kpRd|kpRd|kpRd|kpRd|kpRd|kS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDE) unicodedataname ValueError)r descriptionr?/tmp/pip-target-avibdbtm/lib/python/charset_normalizer/utils.pyis_accentuatedsrcCs.t|}|s|S|d}tt|ddS)N r)r decompositionsplitchrint)rZ decomposedcodesrrr remove_accent)s   r$cCs.t|}x tD]\}}||kr|SqWdS)zK Retrieve the Unicode range official name from a single character. N)ordritems)rZ character_ord range_nameZ ord_rangerrr unicode_range4s r(cCs,yt|}Wntk r"dSXd|kS)NFZLATIN)rrr)rrrrris_latinBs r)cCs(y|dWntk r"dSXdS)NasciiFT)encodeUnicodeEncodeError)rrrris_asciiKs r-cCs2t|}d|krdSt|}|dkr*dSd|kS)NPTFZ Punctuation)rcategoryr()rcharacter_categorycharacter_rangerrris_punctuationSs r2cCs:t|}d|ksd|krdSt|}|dkr2dSd|kS)NSNTFZForms)rr/r()rr0r1rrr is_symbolbs r5cCst|}|dkrdSd|kS)NFZ Emoticons)r()rr1rrr is_emoticonqsr6cCs&|s|dkrdSt|}d|kS)N)u|+,;<>TZ)isspacerr/)rr0rrr is_separator{s r>cCs||kS)N)islowerisupper)rrrris_case_variablesrAcCst|}d|kS)NCo)rr/)rr0rrris_private_use_onlys rCcCs,yt|}Wntk r"dSXd|kS)NFCJK)rrr)rcharacter_namerrris_cjks rFcCs,yt|}Wntk r"dSXd|kS)NFZHIRAGANA)rrr)rrErrr is_hiraganas rGcCs,yt|}Wntk r"dSXd|kS)NFZKATAKANA)rrr)rrErrr is_katakanas rHcCs,yt|}Wntk r"dSXd|kS)NFZHANGUL)rrr)rrErrr is_hanguls rIcCs,yt|}Wntk r"dSXd|kS)NFZTHAI)rrr)rrErrris_thais rJ)r'rcCsxtD]}||krdSqWdS)NTF)r)r'keywordrrris_unicode_range_secondarys rL)sequence search_zonercCst|tstt|}tt|d||kr*|n|jddd}t|dkrNdSxJ|D]B}|dd}x,t D] \}}||kr|S||krr|SqrWqTWdS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr*ignore)errorsr-_) isinstancebytes TypeErrorlenrrdecodelowerreplacerr&)rNrOZseq_lenresultsZspecified_encodingencoding_alias encoding_ianarrrany_specified_encodings"     r^)rrcCs |dkpttd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_16 utf_16_beutf_7 utf_32_le utf_8_sig utf_16_leutf_8 utf_32_beutf_32z encodings.{}) issubclass importlib import_moduleformatrr )rrrris_multi_byte_encodings rm)rNrcCsJxDtD]<}t|}t|tr"|g}x|D]}||r(||fSq(WqWdS)z9 Identify and extract SIG/BOM in given sequence. )N)r rTrU startswith)rN iana_encodingZmarksmarkrrridentify_sig_or_boms    rr)rprcCs|dkS)N>r`rhr)rprrrshould_strip_sig_or_bomsrsT)cp_namestrictrcCsP|dd}x(tD]\}}||ks2||kr|SqW|rLtd||S)NrRrSz Unable to retrieve IANA for '{}')rYrZrr&rrl)rtrur\r]rrr iana_namesrv)decoded_sequencercCs8t}x(|D] }t|}|dkr"q ||q Wt|S)N)setr(addlist)rwrangesrr1rrr range_scan"s r|) iana_name_a iana_name_brc Cst|st|rdStd|j}td|j}|dd}|dd}d}x8tddD]*}t|g}||||kr\|d7}q\W|dS) Ngz encodings.{}rP)rQrr )rmrjrkrlrrangerUrX) r}r~Z decoder_aZ decoder_bZid_aZid_bZcharacter_match_countiZ to_be_decodedrrr cp_similarity0s    rcCs|tko|t|kS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)r}r~rrr is_cp_similarEsr)rM)T):Z unicodedata2r ImportErrorrjcodecsrZencodings.aliasesr functoolsrrertypingrrrr r Z_multibytecodecr Zconstantr rrrrrstrboolrr$r(r)r-r2r5r6r>rArCrFrGrHrIrJrWrLrUr"r^rmrrrsrvr|floatrrrrrrsh