python (3.12.0)

(root)/
lib/
python3.12/
urllib/
__pycache__/
robotparser.cpython-312.opt-1.pyc

̑e$dZddlZddlZddlZdgZejddZGddZGddZ	Gd	d
Z
y)a% robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://www.robotstxt.org/norobots-rfc.txt
NRobotFileParserRequestRatezrequests secondscZeZdZdZddZdZdZdZdZdZ	dZ
d	Zd
ZdZ
dZd
Zy)rzs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    czg|_g|_d|_d|_d|_|j|d|_y)NFr)entriessitemaps
default_entrydisallow_all	allow_allset_urllast_checkedselfurls  F/BuggyBox/python/3.12.0/bootstrap/lib/python3.12/urllib/robotparser.py__init__zRobotFileParser.__init__s;
!!Sc|jS)zReturns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r
rs rmtimezRobotFileParser.mtime%s   rc6ddl}|j|_y)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)timer
)rrs  rmodifiedzRobotFileParser.modified.s
	 IIKrcp||_tjj|dd\|_|_y)z,Sets the URL referring to a robots.txt file.N)rurllibparseurlparsehostpathrs  rrzRobotFileParser.set_url6s-%||44S9!A>	49rc	tjj|j}|j	}|j|j
djy#tjj$rT}|jdvrd|_n4|jdk\r |jdkrd|_Yd}~yYd}~yYd}~yYd}~yd}~wwxYw)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)iiTiiN)
rrequesturlopenrreadrdecode
splitlineserror	HTTPErrorcoder
r)rfrawerrs    rr%zRobotFileParser.read;s		9&&txx0A&&(CJJszz'*5578||%%	&xx:%$(!SSXX^!%&4"	&s)A**C;CCcd|jvr|j||_yy|jj|yN*)
useragentsr	rappend)rentrys  r
_add_entryzRobotFileParser._add_entryHs=%"""!!)%*"*
LL&rcd}t}|j|D]}|s4|dk(r
t}d}n"|dk(r|j|t}d}|jd}|dk\r|d|}|j	}|sh|jdd}t
|dk(s|dj	j|d<tjj|dj	|d<|ddk(rB|dk(r|j|t}|jj|dd}*|ddk(r3|dk7s9|jjt|dd	d}e|dd
k(r3|dk7st|jjt|ddd}|ddk(r?|dk7s|dj	jrt!|d|_d}|dd
k(r|dk7s|djd}t
|dk(rk|dj	jrJ|dj	jr)t%t!|dt!|d|_d}|ddk(s|j(j|d|dk(r|j|yy)zParse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr#N:z
user-agentdisallowFallowTzcrawl-delayzrequest-rate/sitemap)Entryrr4findstripsplitlenlowerrrunquoter1r2	rulelinesRuleLineisdigitintdelayrreq_rater)rlinesstater3lineinumberss       rrzRobotFileParser.parseQs

7	2DA:!GEEaZOOE*!GEE		#AAvBQx::<D::c1%D4yA~q'--///1Q ,,..tAw}}?Q7l*z. %$$++DG4E!W
*z..xQ/GH !!W'z..xQ/FG !!W
-z 7==?224*-d1g,EK !!W.z"&q'--"4LA-'!*2B2B2D2L2L2N '
 0 0 2 : : <-8WQZ#gVWj/-ZEN !!W	)
MM((a1o7	2pA:OOE"rcb|jry|jry|jsytjjtjj
|}tjjdd|j|j|j|jf}tjj|}|sd}|jD]&}|j|s|j|cS|j r|j j|Sy)z=using the parsed robots.txt decide if useragent can fetch urlFTr;)r
rr
rrrrC
urlunparser!paramsqueryfragmentquoter
applies_to	allowancer	)r	useragentr
parsed_urlr3s     r	can_fetchzRobotFileParser.can_fetchs>>
  \\**6<<+?+?+DE
ll%%r"Z__j..
0C0C'EFll  %C\\	,E	*s++	,%%//44rc|jsy|jD]!}|j|s|jcS|jr|jjSyN)rrrVrHr	rrXr3s   rcrawl_delayzRobotFileParser.crawl_delaysYzz|\\	#E	*{{"	#%%+++rc|jsy|jD]!}|j|s|jcS|jr|jjSyr\)rrrVrIr	r]s   rrequest_ratezRobotFileParser.request_ratesYzz|\\	&E	*~~%	&%%...rc4|jsy|jSr\)rrs r	site_mapszRobotFileParser.site_mapss}}}}rc|j}|j||jgz}djtt|S)Nz

)rr	joinmapstr)rrs  r__str__zRobotFileParser.__str__s@,,)!3!3 44G{{3sG,--rN)rP)__name__
__module____qualname____doc__rrrrr%r4rrZr^r`rbrgrrrrsE
!(?
9'G#R:
.rc"eZdZdZdZdZdZy)rEzoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.c|dk(r|sd}tjjtjj|}tjj	||_||_y)NrPT)rrrQrrUr!rW)rr!rWs   rrzRuleLine.__init__sP2:iI||&&v||'<'<T'BCLL&&t,	"rcZ|jdk(xs|j|jSr/)r!
startswith)rfilenames  rrVzRuleLine.applies_tos%yyCA8#6#6tyy#AArcB|jrdnddz|jzS)NAllowDisallowz: )rWr!rs rrgzRuleLine.__str__s>>zTADIIMMrN)rhrirjrkrrVrgrlrrrErEs1#BNrrEc(eZdZdZdZdZdZdZy)r=z?An entry has one or more user-agents and zero or more rulelinesc<g|_g|_d|_d|_yr\)r1rDrHrIrs rrzEntry.__init__s

rcg}|jD]}|jd||j|jd|j|j7|j}|jd|jd|j
|j
tt|jdj|S)NzUser-agent: z
Crawl-delay: zRequest-rate: r;
)r1r2rHrIrequestssecondsextendrerfrDrd)rretagentrates    rrgz
Entry.__str__s__	/EJJeW-.	/::!JJtzzl34==$==DJJ

a~FG

3sDNN+,yy~rc|jddj}|jD]}|dk(ry|j}||vsyy)z2check if this entry applies to the specified agentr;rr0TF)r@rBr1)rrXr}s   rrVzEntry.applies_tosXOOC(+113	__	E|KKME	!
	rcd|jD]!}|j|s|jcSy)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)rDrVrW)rrqrLs   rrWzEntry.allowance
s2NN	&Dx(~~%	&rN)rhrirjrkrrgrVrWrlrrr=r=sI
rr=)rkcollectionsurllib.parserurllib.request__all__
namedtuplerrrEr=rlrr<module>rsU

$k$$]4FG~.~.BNN$((r