a *NaB#@sz ddlZWney&ddlZYn0ddlZddlmZddlmZddlm Z ddl m Z ddl m Z mZmZmZmZddlmZdd lmZmZmZmZmZmZe ed eed d d Ze ed eed ddZe ed eeed ddZe ed eed ddZ eed ddZ!e ed eed ddZ"e ed eed ddZ#e ed eed ddZ$e ed eed ddZ%e ed eed ddZ&eed d d!Z'e ed eed d"d#Z(e ed eed d$d%Z)e ed eed d&d'Z*e ed eed d(d)Z+e ed eed d*d+Z,e e-ed eed,d-d.Z.dIe/e0eed0d1d2Z1e d3d eed4d5d6Z2e/eeee/fd7d8d9Z3eed:d;d<Z4dJeeed>d?d@Z5ee edAdBdCZ6eee7dDdEdFZ8eeedDdGdHZ9dS)KN)IncrementalDecoder)aliases) lru_cache)findall)ListOptionalSetTupleUnion)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize) characterreturncCsTzt|}Wnty"YdS0d|vpRd|vpRd|vpRd|vpRd|vpRd|vS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDE unicodedataname ValueErrorr descriptionrs/private/var/folders/js/6pj4vh5d4zd0k6bxv74qrbhr0000gr/T/pip-target-22xwyzbs/lib/python/charset_normalizer/utils.pyis_accentuateds rcCs.t|}|s|S|d}tt|ddS)N r)r decompositionsplitchrint)rZ decomposedcodesrrr remove_accent)s   r&cCs.t|}tD]\}}||vr|SqdS)zK Retrieve the Unicode range official name from a single character. N)ordritems)rZ character_ord range_nameZ ord_rangerrr unicode_range4s  r*cCs,zt|}Wnty"YdS0d|vS)NFZLATINrrrrris_latinBs  r+cCs(z|dWnty"YdS0dS)NasciiFT)encodeUnicodeEncodeErrorrrrris_asciiKs  r0cCs2t|}d|vrdSt|}|dur*dSd|vS)NPTFZ Punctuationrcategoryr*rcharacter_categorycharacter_rangerrris_punctuationSs r7cCs:t|}d|vsd|vrdSt|}|dur2dSd|vS)NSNTFZFormsr2r4rrr is_symbolbs r:cCst|}|durdSd|vS)NFZ Emoticons)r*)rr6rrr is_emoticonqsr;cCs&|s|dvrdSt|}d|vS)N)u|+,;<>TZ)isspacerr3rr5rrr is_separator{s rDcCs||kSN)islowerisupperr/rrris_case_variablesrHcCst|}d|kS)NCo)rr3rCrrris_private_use_onlys rJcCs,zt|}Wnty"YdS0d|vS)NFCJKrrZcharacter_namerrris_cjks  rMcCs,zt|}Wnty"YdS0d|vS)NFZHIRAGANArrLrrr is_hiraganas  rNcCs,zt|}Wnty"YdS0d|vS)NFZKATAKANArrLrrr is_katakanas  rOcCs,zt|}Wnty"YdS0d|vS)NFZHANGULrrLrrr is_hanguls  rPcCs,zt|}Wnty"YdS0d|vS)NFZTHAIrrLrrris_thais  rQ)r)rcCstD]}||vrdSqdS)NTF)r)r)keywordrrris_unicode_range_secondarysrS)sequence search_zonercCst|tstt|}tt|d||kr*|n|jddd}t|dkrNdS|D]N}|dd}t D]0\}}||kr|S||krn|SqnqRdS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr,ignoreerrorsr-_) isinstancebytes TypeErrorlenrrdecodelowerreplacerr()rUrVZseq_lenresultsZspecified_encodingencoding_alias encoding_ianarrrany_specified_encodings&   rf)rrcCs |dvpttd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > Z utf_8_sigutf_7 utf_16_le utf_32_leutf_16 utf_16_beutf_32utf_8 utf_32_be encodings.{}) issubclass importlib import_moduleformatrr )rrrris_multi_byte_encodings  ru)rUrcCsJtD]@}t|}t|tr |g}|D]}||r$||fSq$qdS)z9 Identify and extract SIG/BOM in given sequence. )N)r r\r] startswith)rU iana_encodingZmarksmarkrrridentify_sig_or_boms  rz)rxrcCs|dvS)N>rkrmr)rxrrrshould_strip_sig_or_bomsr{T)cp_namestrictrcCsP|dd}tD] \}}||ks0||kr|Sq|rLtd||S)NrZr[z Unable to retrieve IANA for '{}')rarbrr(rrt)r|r}rdrerrr iana_names r~)decoded_sequencercCs4t}|D] }t|}|dur q ||q t|SrE)setr*addlist)rrangesrr6rrr range_scan"s r) iana_name_a iana_name_brc Cst|st|rdStd|j}td|j}|dd}|dd}d}tddD]*}t|g}||||krZ|d7}qZ|dS) NgrprWrXrr )rurrrsrtrranger]r`) rrZ decoder_aZ decoder_bZid_aZid_bZcharacter_match_countiZ to_be_decodedrrr cp_similarity0s    rcCs|tvo|t|vS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rrrrr is_cp_similarEs r)rT)T):Z unicodedata2r ImportErrorrrcodecsrZencodings.aliasesr functoolsrrertypingrrrr r Z_multibytecodecr Zconstantr rrrrrstrboolrr&r*r+r0r7r:r;rDrHrJrMrNrOrPrQr_rSr]r$rfrurzr{r~rfloatrrrrrrsh