Current File : //snap/core22/current/lib/python3.10/html/__pycache__/parser.cpython-310.pyc
o

E�h�G�@s�dZddlZddlZddlmZdgZe�d�Ze�d�Ze�d�Z	e�d�Z
e�d	�Ze�d
�Ze�d�Z
e�d�Ze�d
�Ze�d�Ze�dej�Ze�d�Ze�d�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]�>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@s�eZdZdZdZdd�dd�Zdd�Zd	d
�Zdd�Zd
Z	dd�Z
dd�Zdd�Zdd�Z
dd�Zd7dd�Zdd�Zdd�Zdd �Zd!d"�Zd#d$�Zd%d&�Zd'd(�Zd)d*�Zd+d,�Zd-d.�Zd/d0�Zd1d2�Zd3d4�Zd5d6�Zd
S)8raEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )�script�styleT)�convert_charrefscCs||_|��dS)z�Initialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r�reset)�selfr�r
�"/usr/lib/python3.10/html/parser.py�__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj�|�dS)z1Reset this instance.  Loses all unprocessed data.�z???N)�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_markupbase�
ParserBaser�r	r
r
rr`s
zHTMLParser.resetcCs|j||_|�d�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r�goahead�r	�datar
r
r�feedhszHTMLParser.feedcCs|�d�dS)zHandle any buffered data.�N)rrr
r
r�closeqszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_textrr
r
r�get_starttag_textwszHTMLParser.get_starttag_textcCs$|��|_t�d|jtj�|_dS)Nz</\s*%s\s*>)�lowerr�re�compile�Ir)r	�elemr
r
r�set_cdata_mode{s
zHTMLParser.set_cdata_modecCst|_d|_dS�N)rrrrr
r
r�clear_cdata_modes
zHTMLParser.clear_cdata_modecCs|j}d}t|�}||k�rU|jr;|js;|�d|�}|dkr:|�dt||d��}|dkr8t�d��	||�s8�n|}n|j
�	||�}|rI|��}n|jrN�n|}||kro|jrf|jsf|�t
|||���n	|�|||��|�||�}||kr{�n�|j}|d|��r�t�||�r�|�|�}	n@|d|�r�|�|�}	n5|d|�r�|�|�}	n*|d|�r�|�|�}	n|d	|�r�|�|�}	n|d
|ks�|r�|�d�|d
}	n�n�|	dk�r�|sِn|t�||�r�n�|d|��r|d|kr�|�d�n�t�||�r�n�|�||dd��n~|d|��r0|}dD]}
|�|
|d
��r"|t|
�8}n�q|�||d
|��nS|d|��rB|�||dd��nA|||d���dk�r[|�||dd��n(|d	|��rm|�||dd��n|d|��r|�||dd��ntd��|}	|�||	�}n�|d|��r�t�||�}|�r�|� �dd�}|�!|�|�"�}	|d|	d
��s�|	d
}	|�||	�}q	d||d�v�r�|�|||d��|�||d�}ny|d|��rMt#�||�}|�r|� d
�}|�$|�|�"�}	|d|	d
��s|	d
}	|�||	�}q	t%�||�}|�r7|�r6|� �||d�k�r6|�"�}	|	|k�r.|}	|�||d
�}n|d
|k�rL|�d�|�||d
�}nnJd��||ks|�r�||k�r�|j�s�|j�ru|j�su|�t
|||���n	|�|||��|�||�}||d�|_dS)Nr�<�&�"z[\s;]�</�<!--�<?�<!r�)z--!z--�-�z	<![CDATA[��	�	<!doctype�we should not get here!z&#����;zinteresting.search() lied)&r�lenrr�find�rfind�maxrr �searchr�start�handle_datar�	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�
endtagopen�handle_comment�endswith�unknown_declr�handle_decl�	handle_pi�AssertionError�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete)r	rPr�i�n�j�ampposr@r>�k�suffix�namer
r
rr�s�
��







�




�}zHTMLParser.goaheadcCs�|j}|||d�dksJd��|||d�dkr |�|�S|||d�dkr/|�|�S|||d���d	krX|�d
|d�}|dkrIdS|�||d|��|dS|�|�S)
Nr-r,z+unexpected call to parse_html_declaration()r/r*r0z<![r1r2rr4r)rrC�parse_marked_sectionrr7rJ�parse_bogus_comment)r	rTr�gtposr
r
rrEs


z!HTMLParser.parse_html_declarationrcCs`|j}|||d�dvsJd��|�d|d�}|dkrdS|r,|�||d|��|dS)Nr-)r,r)z"unexpected call to parse_comment()rr4r)rr7rG)r	rT�reportr�posr
r
rr\'szHTMLParser.parse_bogus_commentcCsd|j}|||d�dksJd��t�||d�}|sdS|��}|�||d|��|��}|S)Nr-r+zunexpected call to parse_pi()r4)r�picloser:r;rKrP)r	rTrr@rVr
r
rrD3szHTMLParser.parse_picCs�d|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|s(Jd��|��}|�d���|_}||kr�t	�||�}|sCnS|�ddd�\}	}
}|
sRd}n-|dd�dkrd|dd�ksyn|dd�dkrw|dd�krnn|dd�}|r�t
|�}|�|	��|f�|��}||ks:|||���}|d	vr�|�
�\}
}d
|jvr�|
|j�d
�}
t|j�|j�d
�}n|t|j�}|�|||��|S|�d�r�|�||�|S|�||�||jvr�|�|�|S)Nrrz#unexpected call to parse_starttag()r-r0�'r4�")r�/>�
rc)r�check_for_whole_start_tagr�tagfind_tolerantr@rPrNrr�attrfind_tolerantr�append�strip�getpos�countr6r8r<rH�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSr#)r	rT�endposr�attrsr@rX�tag�m�attrname�rest�	attrvaluerP�lineno�offsetr
r
rrA?sX
&(�

�
�

zHTMLParser.parse_starttagcCs�|j}t�||�}|rU|��}|||d�}|dkr|dS|dkr?|�d|�r-|dS|�d|�r5dS||kr;|S|dS|dkrEdS|dvrKdS||krQ|S|dStd	��)
Nrr�/rcr-r4r
z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZr3)r�locatestarttagend_tolerantr@rPr>rL)r	rTrrrrV�nextr
r
rrers.z$HTMLParser.check_for_whole_start_tagcCs*|j}|||d�dksJd��t�||d�}|sdS|��}t�||�}|sn|jdur9|�|||��|St�||d�}|sV|||d�dkrQ|dS|�	|�S|�
d���}|�d|���}|�
|�|dS|�
d���}|jdur�||jkr�|�|||��|S|�
|�|��|S)	Nr-r)zunexpected call to parse_endtagrr4r0z</>r)r�	endendtagr:rP�
endtagfindr@rr<rfr\rNrr7�
handle_endtagr%)r	rTrr@r]�	namematch�tagnamer"r
r
rrB�s8





zHTMLParser.parse_endtagcCs|�||�|�|�dSr$)rmr}�r	rqrpr
r
rrl�szHTMLParser.handle_startendtagcC�dSr$r
r�r
r
rrm��zHTMLParser.handle_starttagcCr�r$r
)r	rqr
r
rr}�r�zHTMLParser.handle_endtagcCr�r$r
�r	rZr
r
rrO�r�zHTMLParser.handle_charrefcCr�r$r
r�r
r
rrR�r�zHTMLParser.handle_entityrefcCr�r$r
rr
r
rr<�r�zHTMLParser.handle_datacCr�r$r
rr
r
rrG�r�zHTMLParser.handle_commentcCr�r$r
)r	�declr
r
rrJ�r�zHTMLParser.handle_declcCr�r$r
rr
r
rrK�r�zHTMLParser.handle_picCr�r$r
rr
r
rrI�r�zHTMLParser.unknown_decl)r)�__name__�
__module__�__qualname__�__doc__rnrrrrrrr#r%rrEr\rDrArerBrlrmr}rOrRr<rGrJrKrIr
r
r
rr?s:		
3"()r�rr�htmlr�__all__r rrSrQrMr?rFr`�commentcloserfrg�VERBOSEryr{r|rrr
r
r
r�<module>s.









��