
    F\hE                     V   S r SSKrSSKrSSKJr  SSKJr  S/r\R                  " S5      r
\R                  " S5      r\R                  " S5      r\R                  " S	5      r\R                  " S
5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S\R(                  5      r\R                  " S5      r\R                  " S5      rS rS r " S S\R4                  5      rg)zA parser for HTML and XHTML.    N)unescape)html5
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z6&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                     U R                  S5      nUR                  S5      (       a  [        U5      $ UR                  S5      (       d  USS  [        ;   a  [        U5      $ U$ )Nr   &#=   )group
startswithr   endswithhtml5_entities)matchrefs     "/usr/lib/python3.13/html/parser.py_replace_attr_charrefr   @   sU    
++a.C
~~d} <<QRN!:}J    c                 6    [         R                  [        U 5      $ N)attr_charrefsubr   )ss    r   _unescape_attrvaluer   L   s    1155r   c                      ^  \ rS rSrSrSrSS.U 4S jjrU 4S jrS rS	 r	S
r
S rS rS rS rS rS S jrS rS rS rS rS rS rS rS rS rS rS rS rS rS rSrU =r $ )!r   P   a  Find tags and other markup and call handler functions.

Usage:
    p = HTMLParser()
    p.feed(data)
    ...
    p.close()

Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag().  The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks).  If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
)scriptstyleT)convert_charrefsc                N   > [         TU ]  5         Xl        U R                  5         g)zInitialize and reset this instance.

If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
N)super__init__r   reset)selfr   	__class__s     r   r!   HTMLParser.__init__h   s     	 0

r   c                 b   > SU l         SU l        [        U l        SU l        [
        TU ]  5         g)z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elemr    r"   )r#   r$   s    r   r"   HTMLParser.resetr   s)    -r   c                 N    U R                   U-   U l         U R                  S5        g)zyFeed data to the parser.

Call this as often as you want, with as little or as much text
as you want (may include '\n').
r   N)r(   goaheadr#   datas     r   feedHTMLParser.feedz   s     ||d*Qr   c                 &    U R                  S5        g)zHandle any buffered data.r
   N)r/   r#   s    r   closeHTMLParser.close   s    Qr   Nc                     U R                   $ )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr5   s    r   get_starttag_textHTMLParser.get_starttag_text   s    ###r   c                     UR                  5       U l        [        R                  " SU R                  -  [        R                  5      U l        g )Nz</\s*%s\s*>)lowerr,   recompileIr+   )r#   elems     r   set_cdata_modeHTMLParser.set_cdata_mode   s/    **,::nt&FMr   c                 (    [         U l        S U l        g r   )r*   r+   r,   r5   s    r   clear_cdata_modeHTMLParser.clear_cdata_mode   s    -r   c                 
   U R                   nSn[        U5      nX4:  Ga  U R                  (       a|  U R                  (       dk  UR	                  SU5      nUS:  aR  UR                  S[        X4S-
  5      5      nUS:  a,  [        R                  " S5      R                  X&5      (       d  GO?UnOHU R                  R                  X#5      nU(       a  UR                  5       nOU R                  (       a  GOUnX5:  aR  U R                  (       a.  U R                  (       d  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X55      nX4:X  a  GOUR                  nU" SU5      (       Ga  [         R#                  X#5      (       a  U R%                  U5      n	OU" SU5      (       a  U R'                  U5      n	OU" SU5      (       a  U R)                  U5      n	ObU" SU5      (       a  U R+                  U5      n	OBU" S	U5      (       a  U R-                  U5      n	O"US
-   U:  a  U R                  S5        US
-   n	OGOU	S:  a  U(       d  GOUR	                  SUS
-   5      n	U	S:  a!  UR	                  SUS
-   5      n	U	S:  a  US
-   n	OU	S
-  n	U R                  (       a.  U R                  (       d  U R                  [        X#U	 5      5        OU R                  X#U	 5        U R                  X95      nGOU" SU5      (       a  [.        R#                  X#5      nU(       a^  UR1                  5       SS n
U R3                  U
5        UR5                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGMp  SX#S  ;   a*  U R                  X#US-    5        U R                  X3S-   5      nGO+U" SU5      (       Ga  [6        R#                  X#5      nU(       a\  UR1                  S
5      n
U R9                  U
5        UR5                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGM+  [:        R#                  X#5      nU(       aI  U(       aA  UR1                  5       X#S  :X  a+  UR5                  5       n	X::  a  Un	U R                  X3S
-   5      nO?US
-   U:  a&  U R                  S5        U R                  X3S
-   5      nOO S5       eX4:  a  GM  U(       ah  X4:  ac  U R                  (       a.  U R                  (       d  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X45      nX#S  U l         g )Nr   <&"   z[\s;]</<!--<?<!r
   r   r      ;zinteresting.search() lied)r(   lenr   r,   findrfindmaxr>   r?   searchr+   starthandle_datar   	updateposr   starttagopenr   parse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefr   handle_charrefend	entityrefhandle_entityref
incomplete)r#   rb   r(   injampposr   r   knames              r   r/   HTMLParser.goahead   s   ,,Le$$T__LLa(q5 %]]3At=F!JJx077HHA((//;AAu(($$Xgl%;<$$Wq\2q$Avu ++J#q!!%%g11++A.Aa(())!,A****1-Aa((a(Aa((33A6A!eq[$$S)AAq5S!a%0A1u#LLa!e4q5 !AAQ,,T__(('A,)?@((16NN1(D!$$g1 ;;=2.D''-		A%c1Q3//Eq,Agbk)((1Q38 NN1c2C##!3 ;;q>D))$/		A%c1Q3//Eq,A"((4u{{};!IIK6 !A NN1!e4!eq[ $$S)qa%0A555qS eV 15$$T__  'A,!78  1.q$Ar{r   c                 p   U R                   nX!US-    S:X  d   S5       eX!US-    S:X  a  U R                  U5      $ X!US-    S:X  a  U R                  U5      $ X!US-    R                  5       S:X  a7  UR	                  S	US-   5      nUS
:X  a  g
U R                  X!S-   U 5        US-   $ U R                  U5      $ )NrO   rN   z+unexpected call to parse_html_declaration()   rL   	   z	<![CDATA[z	<!doctyper   rP   r
   )r(   r]   parse_marked_sectionr=   rS   handle_declparse_bogus_comment)r#   rf   r(   gtposs       r   r_   !HTMLParser.parse_html_declaration  s    ,,1~% 	D )C 	D%QqS>V#%%a((qs^{*,,Q//qs^!!#{2LLac*E{WqS/07N++A..r   c                     U R                   nX1US-    S;   d   S5       eUR                  SUS-   5      nUS:X  a  gU(       a  U R                  X1S-   U 5        US-   $ )NrO   )rN   rK   z(unexpected call to parse_bogus_comment()r   rP   r
   )r(   rS   handle_comment)r#   rf   reportr(   poss        r   rr   HTMLParser.parse_bogus_comment'  sq    ,,1~- 	I 1H 	I-ll3!$"9!C 01Qwr   c                     U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       nU R	                  X!S-   U 5        UR                  5       nU$ )NrO   rM   zunexpected call to parse_pi()rP   )r(   picloserV   rW   	handle_pirb   )r#   rf   r(   r   rh   s        r   r^   HTMLParser.parse_pi3  sn    ,,1~%F'FF%w!,KKMwsA'IIKr   c                    S U l         U R                  U5      nUS:  a  U$ U R                  nX1U U l         / n[        R	                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l        nXb:  a  [        R	                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S Ss=:X  a	  USS  :X  a  O  OUSS nU(       a  [        U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS	;  a  U R                  X1U 5        U$ UR                  S
5      (       a  U R                  Xt5        U$ U R!                  Xt5        XpR"                  ;   a  U R%                  U5        U$ )Nr   r
   z#unexpected call to parse_starttag()rO      'rP   ")r   />r   )r9   check_for_whole_start_tagr(   tagfind_tolerantr   rb   r   r=   r)   attrfind_tolerantr   appendstriprX   r   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSrB   )r#   rf   endposr(   attrsr   rj   tagmattrnamerest	attrvaluerb   s                r   r[   HTMLParser.parse_starttag?  s   #//2A:M,,&0  &&w!4;;;uIIK"[[^1133sj!''3A()1a(8%HI 	2A$8)BC.82A#7237%aO	/	:	LL(..*I67A j %%'k!Wv./M<<##C/
    ,111##C(r   c                 l   U R                   n[        R                  X!5      nU(       a  UR                  5       nX$US-    nUS:X  a  US-   $ US:X  a@  UR	                  SU5      (       a  US-   $ UR	                  SU5      (       a  gXA:  a  U$ US-   $ US:X  a  gUS;   a  gXA:  a  U$ US-   $ [        S	5      e)
Nr
   r   /r   rO   rP   r'   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r(   locatestarttagend_tolerantr   rb   r   AssertionError)r#   rf   r(   r   rh   nexts         r   r   $HTMLParser.check_for_whole_start_tagk  s    ,,&,,W8AQqS>Ds{1us{%%dA..q5L%%c1--5Hq5Lrz 5 6 u1u677r   c                 *   U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       n[        R                  X!5      nU(       d  U R                  b  U R                  X!U 5        U$ [        R                  X!S-   5      nU(       d!  X!US-    S:X  a  US-   $ U R                  U5      $ UR                  S5      R                  5       nUR                  SUR                  5       5      nU R                  U5        US-   $ UR                  S5      R                  5       nU R                  b$  XpR                  :w  a  U R                  X!U 5        U$ U R                  U5        U R                  5         U$ )	NrO   rK   zunexpected call to parse_endtagr
   rP   r   z</>r   )r(   	endendtagrV   rb   
endtagfindr   r,   rX   r   rr   r   r=   rS   handle_endtagrE   )r#   rf   r(   r   rs   	namematchtagnamerA   s           r   r\   HTMLParser.parse_endtag  si   ,,1~%H'HH%  A#.		  ,*  5!12(..w!<IQqS>U*Q3J33A66ooa(..0G
 LLimmo6Ew'7N{{1~##%??&&  5!124 r   c                 H    U R                  X5        U R                  U5        g r   )r   r   r#   r   r   s      r   r   HTMLParser.handle_startendtag  s    S(3r   c                     g r    r   s      r   r   HTMLParser.handle_starttag      r   c                     g r   r   )r#   r   s     r   r   HTMLParser.handle_endtag  r   r   c                     g r   r   r#   rk   s     r   ra   HTMLParser.handle_charref  r   r   c                     g r   r   r   s     r   rd   HTMLParser.handle_entityref  r   r   c                     g r   r   r0   s     r   rX   HTMLParser.handle_data  r   r   c                     g r   r   r0   s     r   rv   HTMLParser.handle_comment  r   r   c                     g r   r   )r#   decls     r   rq   HTMLParser.handle_decl  r   r   c                     g r   r   r0   s     r   r|   HTMLParser.handle_pi  r   r   c                     g r   r   r0   s     r   unknown_declHTMLParser.unknown_decl  r   r   )__starttag_textr,   r   r+   r)   r(   )r
   )!__name__
__module____qualname____firstlineno____doc__r   r!   r"   r2   r6   r9   r:   rB   rE   r/   r_   rr   r^   r[   r   r\   r   r   r   ra   rd   rX   rv   rq   r|   r   __static_attributes____classcell__)r$   s   @r   r   r   P   s    * 1+/   O$Nu#t/*		(X8D%P 
 r   )r   r>   _markupbasehtmlr   html.entitiesr   r   __all__r?   r*   re   rc   r`   r   rZ   r{   commentcloser   r   VERBOSEr   r   r   r   r   
ParserBaser   r   r   r   <module>r      s   " 
   1 . ZZ' ZZ%
JJ>?	
**@
AzzSTzz+&
**S/zz)$ ::LM JJ=>   ZZ ) ZZ  JJsO	 ZZ>?

6J'' Jr   