a 97aÑã@s‚ddlmZmZmZddlmZddlmZddlm Z m Z e e_ e e_ dgZ Gdd„deƒZGdd„deƒZGd d „d eƒZd S) é)Úabsolute_importÚdivisionÚunicode_literals©Ústr)Úurllib)ÚparseÚrequestÚRobotFileParserc@sZeZdZdZddd„Zdd„Zdd„Zd d „Zd d „Zd d„Z dd„Z dd„Z dd„Z dS)r zs This class provides a set of methods to read, parse and answer questions about a single robots.txt file. ÚcCs,g|_d|_d|_d|_| |¡d|_dS)NFr)ÚentriesÚ default_entryÚ disallow_allÚ allow_allÚset_urlÚ last_checked©ÚselfÚurl©rú~/private/var/folders/s6/9n5zrl012gv99k63s4q6ccsd4s6mqz/T/pip-target-f5cq3f2q/lib/python/future/backports/urllib/robotparser.pyÚ__init__s  zRobotFileParser.__init__cCs|jS)z·Returns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. )r©rrrrÚmtime&szRobotFileParser.mtimecCsddl}| ¡|_dS)zYSets the time the robots.txt file was last fetched to the current time. rN)Útimer)rrrrrÚmodified/szRobotFileParser.modifiedcCs&||_tj |¡dd…\|_|_dS)z,Sets the URL referring to a robots.txt file.ééN)rrrÚurlparseÚhostÚpathrrrrr7szRobotFileParser.set_urlc Cs~ztj |j¡}WnJtjjy\}z.|jdvr8d|_n|jdkrHd|_WYd}~n&d}~00|  ¡}|  |  d¡  ¡¡dS)z4Reads the robots.txt URL and feeds it to the parser.)i‘i“TiNzutf-8) rr ÚurlopenrÚerrorÚ HTTPErrorÚcoderrÚreadrÚdecodeÚ splitlines)rÚfÚerrÚrawrrrr%<s  zRobotFileParser.readcCs,d|jvr|jdur(||_n |j |¡dS©NÚ*)Ú useragentsr r Úappend)rÚentryrrrÚ _add_entryIs  zRobotFileParser._add_entrycCsnd}tƒ}|D]D}|sH|dkr,tƒ}d}n|dkrH| |¡tƒ}d}| d¡}|dkrf|d|…}| ¡}|stq| dd¡}t|ƒdkr|d ¡ ¡|d<tj  |d ¡¡|d<|ddkrò|dkrÜ| |¡tƒ}|j   |d¡d}q|ddkr&|dkrT|j   t |dd ƒ¡d}q|dd kr|dkr|j   t |dd ƒ¡d}q|dkrj| |¡dS) z”Parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines. rréú#Nú:z user-agentZdisallowFZallowT)ÚEntryr0ÚfindÚstripÚsplitÚlenÚlowerrrÚunquoter-r.Ú rulelinesÚRuleLine)rÚlinesÚstater/ÚlineÚirrrrRsJ            zRobotFileParser.parsecCs–|jr dS|jrdStj tj |¡¡}tj dd|j|j|j |j f¡}tj  |¡}|s\d}|j D]}|  |¡rb| |¡Sqb|jr’|j |¡SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr ú/)rrrrrr:Ú urlunparser ÚparamsÚqueryÚfragmentÚquoter Ú applies_toÚ allowancer )rÚ useragentrÚ parsed_urlr/rrrÚ can_fetch…s" ÿ    zRobotFileParser.can_fetchcCsd dd„|jDƒ¡S)Nr cSsg|]}t|ƒd‘qS)Ú r)Ú.0r/rrrÚ óz+RobotFileParser.__str__..)Újoinr rrrrÚ__str__œszRobotFileParser.__str__N)r ) Ú__name__Ú __module__Ú __qualname__Ú__doc__rrrrr%r0rrKrQrrrrr s    3c@s(eZdZdZdd„Zdd„Zdd„ZdS) r<zoA rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.cCs(|dkr|sd}tj |¡|_||_dS)Nr T)rrrFr rH)rr rHrrrr£s zRuleLine.__init__cCs|jdkp| |j¡Sr+)r Ú startswith)rÚfilenamerrrrGªszRuleLine.applies_tocCs|jr dp dd|jS)NZAllowZDisallowz: )rHr rrrrrQ­szRuleLine.__str__N)rRrSrTrUrrGrQrrrrr< sr<c@s0eZdZdZdd„Zdd„Zdd„Zdd „Zd S) r4z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r-r;rrrrr³szEntry.__init__cCsHg}|jD]}| d|dg¡q |jD]}| t|ƒdg¡q&d |¡S)Nz User-agent: rLr )r-Úextendr;rrP)rÚretÚagentr?rrrrQ·s   z Entry.__str__cCsF| d¡d ¡}|jD](}|dkr*dS| ¡}||vrdSqdS)z2check if this entry applies to the specified agentrArr,TF)r7r9r-)rrIrZrrrrG¿s zEntry.applies_tocCs$|jD]}| |¡r|jSqdS)zZPreconditions: - our agent applies to this entry - filename is URL decodedT)r;rGrH)rrWr?rrrrHÌs   zEntry.allowanceN)rRrSrTrUrrQrGrHrrrrr4±s  r4N)Ú __future__rrrZfuture.builtinsrZfuture.backportsrZfuture.backports.urllibrÚ_parser Ú_requestÚ__all__Úobjectr r<r4rrrrÚs