
    F\h$                         S r SSKrSSKrSSKrSSKrS/r\R                  " SS5      r " S S5      r	 " S S5      r
 " S	 S
5      rg)a  robotparser.py

Copyright (C) 2000  Bastian Kleineidam

You can choose between two licenses when using this package:
1) GNU GPLv2
2) PSF license for Python 2.2

The robots.txt Exclusion Protocol is implemented as specified in
http://www.robotstxt.org/norobots-rfc.txt
    NRobotFileParserRequestRatezrequests secondsc                   d    \ rS rSrSrSS jrS rS rS rS r	S r
S	 rS
 rS rS rS rS rSrg)r      zjThis class provides a set of methods to read, parse and answer
questions about a single robots.txt file.

c                 z    / U l         / U l        S U l        SU l        SU l        U R                  U5        SU l        g )NFr   )entriessitemapsdefault_entrydisallow_all	allow_allset_urllast_checkedselfurls     )/usr/lib/python3.13/urllib/robotparser.py__init__RobotFileParser.__init__   s;    !!S    c                     U R                   $ )zReturns the time the robots.txt file was last fetched.

This is useful for long-running web spiders that need to
check for new robots.txt files periodically.

)r   r   s    r   mtimeRobotFileParser.mtime&   s        r   c                 6    SSK nUR                  5       U l        g)zISets the time the robots.txt file was last fetched to the
current time.

r   N)timer   )r   r   s     r   modifiedRobotFileParser.modified/   s    
 	 IIKr   c                 n    Xl         [        R                  R                  U5      SS u  U l        U l        g)z,Sets the URL referring to a robots.txt file.      N)r   urllibparseurlparsehostpathr   s     r   r   RobotFileParser.set_url7   s+    %||44S9!A>	49r   c                     [         R                  R                  U R                  5      nUR	                  5       nU R                  UR                  S5      R                  5       5        g! [         R                  R                   aY  nUR                  S;   a  SU l        O'UR                  S:  a  UR                  S:  a  SU l        UR                  5          SnAgSnAff = f)z4Reads the robots.txt URL and feeds it to the parser.zutf-8)i  i  Ti  i  N)r!   requesturlopenr   readr"   decode
splitlineserror	HTTPErrorcoder   r   close)r   frawerrs       r   r*   RobotFileParser.read<   s    
	9&&txx0A &&(CJJszz'*5578 ||%% 	xx:%$(!SSXX^!%IIKK	s   )A* *C!ACC!c                     SUR                   ;   a  U R                  c  Xl        g g U R                  R                  U5        g N*)
useragentsr
   r   append)r   entrys     r   
_add_entryRobotFileParser._add_entryJ   s;    %"""!!)%*" * LL&r   c                 B   Sn[        5       nU R                  5         U GH  nU(       d6  US:X  a  [        5       nSnO#US:X  a  U R                  U5        [        5       nSnUR                  S5      nUS:  a  USU nUR	                  5       nU(       d  Mv  UR                  SS5      n[        U5      S:X  d  M  US   R	                  5       R                  5       US'   [        R                  R                  US   R	                  5       5      US'   US   S:X  aD  US:X  a  U R                  U5        [        5       nUR                  R                  US   5        SnGM=  US   S:X  a6  US:w  a-  UR                  R                  [        US   S	5      5        SnGMy  GM|  US   S
:X  a6  US:w  a-  UR                  R                  [        US   S5      5        SnGM  GM  US   S:X  aG  US:w  a>  US   R	                  5       R                  5       (       a  [!        US   5      Ul        SnGM  GM  US   S:X  a  US:w  a  US   R                  S5      n[        U5      S:X  au  US   R	                  5       R                  5       (       aO  US   R	                  5       R                  5       (       a)  [%        [!        US   5      [!        US   5      5      Ul        SnGM  GM  US   S:X  d  GM  U R(                  R                  US   5        GM     US:X  a  U R                  U5        gg)z|Parse the input lines from a robots.txt file.

We allow that a user-agent: line is not preceded by
one or more blank lines.
r   r      #N:z
user-agentdisallowFallowTzcrawl-delayzrequest-rate/sitemap)Entryr   r;   findstripsplitlenlowerr!   r"   unquoter8   r9   	rulelinesRuleLineisdigitintdelayr   req_rater	   )r   linesstater:   lineinumberss          r   r"   RobotFileParser.parseS   s    DA:!GEEaZOOE*!GEE		#AAvBQx::<D::c1%D4yA~q'--///1Q ,,..tAw}}?Q7l*z. %$$++DG4E!W
*z..xQ/GH ! " !W'z..xQ/FG ! " !W-z  7==?2244*-d1g,EK ! " !W.z"&q'--"4LA-'!*2B2B2D2L2L2N2N '
 0 0 2 : : < <-8WQZ#gVWj/-ZEN ! " !W	)
 MM((a1o p A:OOE" r   c                    U R                   (       a  gU R                  (       a  gU R                  (       d  g[        R                  R                  [        R                  R                  U5      5      n[        R                  R                  SSUR                  UR                  UR                  UR                  45      n[        R                  R                  U5      nU(       d  SnU R                   H,  nUR                  U5      (       d  M  UR                  U5      s  $    U R                   (       a  U R                   R                  U5      $ g)z=using the parsed robots.txt decide if useragent can fetch urlFT rC   )r   r   r   r!   r"   r#   rK   
urlunparser%   paramsqueryfragmentquoter   
applies_to	allowancer
   )r   	useragentr   
parsed_urlr:   s        r   	can_fetchRobotFileParser.can_fetch   s    >>
    \\**6<<+?+?+DE
ll%%r"Z__j..
0C0C'E Fll  %C\\E	**s++ " %%//44r   c                     U R                  5       (       d  g U R                   H'  nUR                  U5      (       d  M  UR                  s  $    U R                  (       a  U R                  R                  $ g N)r   r   r_   rP   r
   r   ra   r:   s      r   crawl_delayRobotFileParser.crawl_delay   sY    zz||\\E	**{{" " %%+++r   c                     U R                  5       (       d  g U R                   H'  nUR                  U5      (       d  M  UR                  s  $    U R                  (       a  U R                  R                  $ g rf   )r   r   r_   rQ   r
   rg   s      r   request_rateRobotFileParser.request_rate   sY    zz||\\E	**~~% " %%...r   c                 >    U R                   (       d  g U R                   $ rf   )r	   r   s    r   	site_mapsRobotFileParser.site_maps   s    }}}}r   c                     U R                   nU R                  b  XR                  /-   nSR                  [        [        U5      5      $ )Nz

)r   r
   joinmapstr)r   r   s     r   __str__RobotFileParser.__str__   s>    ,,)!3!3 44G{{3sG,--r   )	r   r
   r   r   r$   r   r%   r	   r   N)rY   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   r*   r;   r"   rc   rh   rk   rn   rt   __static_attributes__ r   r   r   r      sE    
!(?
9'G#R:
.r   c                   *    \ rS rSrSrS rS rS rSrg)rM      zhA rule line is a single "Allow:" (allowance==True) or "Disallow:"
(allowance==False) followed by a path.c                     US:X  a	  U(       d  Sn[         R                  R                  [         R                  R                  U5      5      n[         R                  R	                  U5      U l        X l        g )NrY   T)r!   r"   rZ   r#   r^   r%   r`   )r   r%   r`   s      r   r   RuleLine.__init__   sN    2:iI||&&v||'<'<T'BCLL&&t,	"r   c                 d    U R                   S:H  =(       d    UR                  U R                   5      $ r6   )r%   
startswith)r   filenames     r   r_   RuleLine.applies_to   s%    yyCA8#6#6tyy#AAr   c                 L    U R                   (       a  SOSS-   U R                  -   $ )NAllowDisallowz: r`   r%   r   s    r   rt   RuleLine.__str__   s    >>zTADIIMMr   r   N)	rv   rw   rx   ry   rz   r   r_   rt   r{   r|   r   r   rM   rM      s    1#BNr   rM   c                   0    \ rS rSrSrS rS rS rS rSr	g)	rE      z?An entry has one or more user-agents and zero or more rulelinesc                 <    / U l         / U l        S U l        S U l        g rf   )r8   rL   rP   rQ   r   s    r   r   Entry.__init__   s    
r   c                    / nU R                    H  nUR                  SU 35        M     U R                  b  UR                  SU R                   35        U R                  b7  U R                  nUR                  SUR                   SUR
                   35        UR                  [        [        U R                  5      5        SR                  U5      $ )NzUser-agent: zCrawl-delay: zRequest-rate: rC   
)r8   r9   rP   rQ   requestssecondsextendrr   rs   rL   rq   )r   retagentrates       r   rt   Entry.__str__   s    __EJJeW-. %::!JJtzzl34==$==DJJa~FG

3sDNN+,yy~r   c                     UR                  S5      S   R                  5       nU R                   H"  nUS:X  a    gUR                  5       nX!;   d  M"    g   g)z2check if this entry applies to the specified agentrC   r   r7   TF)rH   rJ   r8   )r   ra   r   s      r   r_   Entry.applies_to   sQ     OOC(+113	__E|KKME! % r   c                 r    U R                    H'  nUR                  U5      (       d  M  UR                  s  $    g)zJPreconditions:
- our agent applies to this entry
- filename is URL decodedT)rL   r_   r`   )r   r   rT   s      r   r`   Entry.allowance  s0     NNDx((~~% # r   )rP   rQ   rL   r8   N)
rv   rw   rx   ry   rz   r   rt   r_   r`   r{   r|   r   r   rE   rE      s    I
r   rE   )rz   collectionsurllib.errorr!   urllib.parseurllib.request__all__
namedtupler   r   rM   rE   r|   r   r   <module>r      sV   
    
$$]4FG. .DN N$( (r   