U á€C^ªã@s„ddlmZddlmZddlmZmZmZmZm Z m Z m Z m Z m Z mZddlmZmZddlmZmZmZmZGdd„deƒZd S) é)Úunicode_literalsé)Ú Lemmatizer) ÚPOSÚNOUNÚVERBÚADJÚADVÚPRONÚDETÚAUXÚPUNCTÚADP)ÚSCONJÚCCONJ)Ú VerbForm_infÚ VerbForm_noneÚ Number_singÚ Degree_posc@s^eZdZdZddd„Zddd„Zddd„Zdd d „Zdd d „Zdd d„Z ddd„Z dd„Z dS)ÚFrenchLemmatizeraN French language lemmatizer applies the default rule based lemmatization procedure with some modifications for better French language support. The parts of speech 'ADV', 'PRON', 'DET', 'ADP' and 'AUX' are added to use the rule-based lemmatization. As a last resort, the lemmatizer checks in the lookup table. Nc Cs†|j di¡}d|jkr&| ||¡gS|tddfkr:d}nÖ|tddfkrNd}nÂ|tddfkrbd}n®|td d fkrvd }nš|td d fkrŠd }n†|td dfkržd}nr|t ddfkr²d}n^|t ddfkrÆd}nJ|t ddfkrÚd}n6|t ddfkrîd}n"|t ddfkrd}n | |¡gS| ||¡r0tt| ¡gƒƒS|j di¡}|j di¡}|j di¡}| || |i¡| |i¡| |g¡¡}|S)NÚ lemma_lookupZ lemma_rulesrÚnounrÚverbrÚadjrZadpr Zadvr ZauxrZcconjr Zdetr Zpronr ÚpunctrZsconjZ lemma_indexZ lemma_exc)ÚlookupsÚ get_tableÚgetrrrrr r rr r r rÚlookupÚ is_base_formÚlistÚsetÚlowerÚ lemmatize) ÚselfÚstringÚuniv_posÚ morphologyÚ lookup_tableZ index_tableZ exc_tableZ rules_tableZlemmas©r)ú;/tmp/pip-install-6_kvzl1k/spacy/spacy/lang/fr/lemmatizer.pyÚ__call__sL     üzFrenchLemmatizer.__call__cCsÞ|dkr in|}dd„|Dƒ}|dkr8| d¡dkr8dS|dkrR| d ¡d krRdS|dkrŒ| d ¡d krŒ| d ¡d krŒ| d¡dkrŒ|sŒdS|dkr¦| d¡dkr¦dSt|kr²dSt|kr¾dSt|krÊdSt|krÖdSdSdS)z{ Check whether we're dealing with an uninflected paradigm, so we can avoid lemmatization entirely. NcSs"g|]}|tddddfkr|‘qS)ÚNumberrÚVerbFormÚTense)r)Ú.0Úkeyr)r)r*Ú Dsþz1FrenchLemmatizer.is_base_form..rr,ZsingTrr-ÚinfÚfinr.ZpresrZDegreeÚposF)rrrrr)r$r&r'Zothersr)r)r*r>s:þ ÿ þ ýüzFrenchLemmatizer.is_base_formcCs ||d|ƒS)Nrr)©r$r%r'r)r)r*rcszFrenchLemmatizer.nouncCs ||d|ƒS)Nrr)r5r)r)r*rfszFrenchLemmatizer.verbcCs ||d|ƒS)Nrr)r5r)r)r*riszFrenchLemmatizer.adjcCs ||d|ƒS)Nrr)r5r)r)r*rlszFrenchLemmatizer.punctcCs.|j di¡}|dk r*||kr*||dS|S©Nrr)rr)r$r%Zorthr(r)r)r*ros zFrenchLemmatizer.lookupc Csô|j di¡}| ¡}g}||kr0| |¡|S| | |g¡¡g}|sª|D]Z\}} | |¡rN|dt|ƒt|ƒ…| } | s‚qN| |ks’|  ¡sž| | ¡qN| | ¡qN|s¸| |¡|sÚ||  ¡krÚ| ||d¡|sè| |¡t t |ƒƒSr6) rrr"ÚappendÚextendrÚendswithÚlenÚisalphaÚkeysr r!) r$r%ÚindexÚ exceptionsÚrulesr(ZformsZ oov_formsÚoldÚnewÚformr)r)r*r#us0       zFrenchLemmatizer.lemmatize)N)N)N)N)N)N)N) Ú__name__Ú __module__Ú __qualname__Ú__doc__r+rrrrrrr#r)r)r)r*r s * %     rN)Ú __future__rZ lemmatizerrÚsymbolsrrrrr r r r r rrrrrrrrr)r)r)r*Ús  0