o
    ThM                     @   s,  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
mZmZmZmZ ddlmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZ ddlZG d	d
 d
eZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%dS )z#Tests of Beautiful Soup as a whole.    )	set_traceN)BeautifulSoupBeautifulStoneSoupGuessedAtParserWarningMarkupResemblesLocatorWarningdammit)builder_registryTreeBuilderParserRejectedMarkup)CommentSoupStrainerPYTHON_SPECIFIC_ENCODINGSTagNavigableString   )default_builderLXML_PRESENTSoupTestc                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )TestConstructorc                 C   "   d}|  |}d|jjksJ d S )Nu   <h1>éé</h1>u   éésouph1stringselfdatar    r   S/home/air/segue/gpt/backup/venv/lib/python3.10/site-packages/bs4/tests/test_soup.pytest_short_unicode_input)      
z(TestConstructor.test_short_unicode_inputc                 C   r   )Nz<h1>foo bar</h1>zfoo barr   r   r   r   r   test_embedded_null.   r    z"TestConstructor.test_embedded_nullc                 C   s,   d d}| j|dgd}d|jksJ d S )Nu   Räksmörgåsutf-8)exclude_encodingszwindows-1252)encoder   original_encoding)r   	utf8_datar   r   r   r   test_exclude_encodings3   s   
z&TestConstructor.test_exclude_encodingsc                 C   s  G dd dt }tddd}tjdd tdd|i|}W d    n1 s(w   Y  t|j|s5J tdd	|jjks@J d
|jjksHJ |di |}tjdd}td|dd}W d    n1 sgw   Y  t	|d j
}|dszJ ||jksJ ||jksJ d S )Nc                   @   sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Ze ZZ	dd Z
dS )z7TestConstructor.test_custom_builder_class.<locals>.Mockc                 [   s(   || _ d| _d| _g | _g | _i | _d S )NTF)called_withis_xmlstore_line_numberscdata_list_attributespreserve_whitespace_tagsstring_containers)r   kwargsr   r   r   __init__<   s   
z@TestConstructor.test_custom_builder_class.<locals>.Mock.__init__c                 S      d S Nr   r   r   r   r   r   initialize_soupC      zGTestConstructor.test_custom_builder_class.<locals>.Mock.initialize_soupc                 S   s
   || _ d S r1   )fed)r   markupr   r   r   feedE   s   
z<TestConstructor.test_custom_builder_class.<locals>.Mock.feedc                 S   r0   r1   r   r   r   r   r   resetG   r4   z=TestConstructor.test_custom_builder_class.<locals>.Mock.resetc                 S   r0   r1   r   )r   ignorer   r   r   r:   I   r4   z>TestConstructor.test_custom_builder_class.<locals>.Mock.ignorec                    s    dV  d S )N)prepared markupzoriginal encodingzdeclared encodingzcontains replacement charactersr   r   argsr.   r   r   r   prepare_markupL   s   
zFTestConstructor.test_custom_builder_class.<locals>.Mock.prepare_markupN)__name__
__module____qualname__r/   r3   r7   r9   r:   set_up_substitutionscan_be_empty_elementr>   r   r   r   r   Mock;   s    rD   valueT)varconvertEntitiesrecord builder)rF   r;   )rK   ignored_valuer   zCKeyword arguments to the BeautifulSoup constructor will be ignored.)rJ   r   )objectdictwarningscatch_warningsr   
isinstancerK   r(   r5   strmessage
startswith)r   rD   r.   r   rK   wmsgr   r   r   test_custom_builder_class8   s*   z)TestConstructor.test_custom_builder_classc                 C   sl   G dd dt }dd }dd l}tt}td|d W d    n1 s&w   Y  dt|jv s4J d S )	Nc                   @      e Zd Zdd ZdS )z:TestConstructor.test_parser_markup_rejection.<locals>.Mockc                 _   s   t d)NzNope.)r
   r<   r   r   r   r7   l   s   z?TestConstructor.test_parser_markup_rejection.<locals>.Mock.feedN)r?   r@   rA   r7   r   r   r   r   rD   k   s    rD   c                    s"    t d d dfV  t d d dfV  d S )NF)r6   r<   r   r   r   r>   o   s   zDTestConstructor.test_parser_markup_rejection.<locals>.prepare_markupr   rJ   )rK   zoThe markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.)r	   repytestraisesr
   r   rR   rE   )r   rD   r>   rY   exc_infor   r   r   test_parser_markup_rejectionh   s   z,TestConstructor.test_parser_markup_rejectionc              	   C   s   d}|  |}|j}d|d ksJ ddg|d ksJ | j |td d}d|jd ks-J ddiddifD ]5}tjd	d
}| j |d |d}W d    n1 sPw   Y  |j}ddg|d ksbJ d|d ksjJ q5d S )Nz&<a id=" an id " class=" a class "></a>z an id idaclass)rK   multi_valued_attributesz	 a class *TrH   an)r   r_   r   rO   rP   )r   r6   r   r_   
switcheroorU   r   r   r   test_cdata_list_attributes{   s   
z*TestConstructor.test_cdata_list_attributesc                    sp   G dd dt G dd dtG dd dt | jdt tt id}t fd	d
| D s6J d S )Nc                   @      e Zd ZdS )z9TestConstructor.test_replacement_classes.<locals>.TagPlusNr?   r@   rA   r   r   r   r   TagPlus       rh   c                   @   rf   )z<TestConstructor.test_replacement_classes.<locals>.StringPlusNrg   r   r   r   r   
StringPlus   ri   rj   c                   @   rf   )z=TestConstructor.test_replacement_classes.<locals>.CommentPlusNrg   r   r   r   r   CommentPlus   ri   rk   z<a><b>foo</b>bar</a><!--whee-->)element_classesc                 3   s     | ]}t | fV  qd S r1   )rQ   .0xrk   rj   rh   r   r   	<genexpr>   s
    
z;TestConstructor.test_replacement_classes.<locals>.<genexpr>)r   r   r   r   allrecursiveChildGeneratorr2   r   rp   r   test_replacement_classes   s   z(TestConstructor.test_replacement_classesc                 C   s   G dd dt }G dd dt }| jd||dd}t|jjd t s%J t|jjd |s0J |jjD ]	}t||s=J q4g |jksEJ d S )	Nc                   @   rf   )zATestConstructor.test_alternate_string_containers.<locals>.PStringNrg   r   r   r   r   PString   ri   ru   c                   @   rf   )zATestConstructor.test_alternate_string_containers.<locals>.BStringNrg   r   r   r   r   BString   ri   rv   z4<div>Hello.<p>Here is <b>some <i>bolded</i></b> text)bp)r-   r   )	r   r   rQ   divcontentsrx   rw   stringsstring_container_stack)r   ru   rv   r   sr   r   r    test_alternate_string_containers   s   	z0TestConstructor.test_alternate_string_containersN)r?   r@   rA   r   r!   r'   rW   r]   re   rt   r~   r   r   r   r   r   '   s    0r   c                   @   sT   e Zd Zejdddgdd Zejddd eD d	g d
d Zdd Z	d	S )
TestOutputz!eventual_encoding,actual_encoding)r"   r"   )utf-16r   c                 C   s0   |  d}d|_d| d|j|dksJ d S )N<tag></tag>Tz<?xml version="1.0" encoding="z"?>
<tag></tag>eventual_encoding)r   r)   decode)r   r   actual_encodingr   r   r   r   test_decode_xml_declaration   s
   


z&TestOutput.test_decode_xml_declarationr   c                 C   s   g | ]}|qS r   r   rm   r   r   r   
<listcomp>   s    zTestOutput.<listcomp>Nc                 C   s(   t dd}d|_d|j|dksJ d S )Nr   html.parserTz!<?xml version="1.0"?>
<tag></tag>r   )r   r)   r   )r   r   r   r   r   r   Mtest_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding   s
   

zXTestOutput.test_decode_xml_declaration_with_missing_or_python_internal_eventual_encodingc                 C   sV   |  d}d|jddksJ d|jddksJ d| ks!J d| ks)J d S )Nr   s   <tag></tag>r"   )encodingz<tag>
</tag>
)r   r$   encode_contentsdecode_contentsprettifyr2   r   r   r   test   s
   
zTestOutput.test)
r?   r@   rA   rZ   markparametrizer   r   r   r   r   r   r   r   r      s    


	r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
ejdg ddd Zejdg ddd Zdd Zdd Zdd Zdd Zd S )!TestWarningsc                 C   s<   |D ]}t |j|r|jtksJ |  S qtd||f )Nz%s warning not found in %r)rQ   rS   filename__file__	Exception)r   rO   clsrU   r   r   r   _assert_warning  s   zTestWarnings._assert_warningc                 C   s2   |  |t}t|j}|tjd d sJ d S )N<   )r   r   rR   rS   rT   r   NO_PARSER_SPECIFIED_WARNING)r   rU   warningrS   r   r   r   _assert_no_parser_specified  s   
z(TestWarnings._assert_no_parser_specifiedc                 C   sB   t jdd}td}W d    n1 sw   Y  | | d S NTrH   <a><b></b></a>rO   rP   r   r   r   rU   r   r   r   r   #test_warning_if_no_parser_specified  s   
z0TestWarnings.test_warning_if_no_parser_specifiedc                 C   sD   t jdd}tdd}W d    n1 sw   Y  | | d S )NTrH   r   htmlr   r   r   r   r   *test_warning_if_parser_specified_too_vague  s   z7TestWarnings.test_warning_if_parser_specified_too_vaguec                 C   sF   t jdd}| d}W d    n1 sw   Y  g |ks!J d S r   rO   rP   r   r   r   r   r   ,test_no_warning_if_explicit_parser_specified  s   z9TestWarnings.test_no_warning_if_explicit_parser_specifiedc                 C   s   t jdd}tddtdd}W d    n1 sw   Y  | |t}t|j}d|v s0J d|v s6J d	| ks>J d S )
NTrH   r   r   rw   )parseOnlyTheser   
parse_onlys   <b></b>)	rO   rP   r   r   r   DeprecationWarningrR   rS   r$   )r   rU   r   r   rV   r   r   r   )test_parseOnlyThese_renamed_to_parse_only"  s   
z6TestWarnings.test_parseOnlyThese_renamed_to_parse_onlyc                 C   s~   t jdd}d}t|ddd}W d    n1 sw   Y  | |t}t|j}d|v s0J d|v s6J d|jks=J d S )	NTrH   s   ér   utf8)fromEncodingr   from_encoding)rO   rP   r   r   r   rR   rS   r%   )r   rU   r   r   r   rV   r   r   r   *test_fromEncoding_renamed_to_from_encoding.  s   
z7TestWarnings.test_fromEncoding_renamed_to_from_encodingc                 C   s>   t t | jddd W d    d S 1 sw   Y  d S )Nz<a>T)no_such_argument)rZ   r[   	TypeErrorr   r8   r   r   r   "test_unrecognized_keyword_argument:  s   "z/TestWarnings.test_unrecognized_keyword_argument	extension)zmarkup.htmlz
markup.htmzmarkup.HTMLz
markup.txtzmarkup.xhtmlz
markup.xmlz/home/user/filezc:\userilec                 C   s^   t jdd}td| d}| |t}dt|jv sJ W d    d S 1 s(w   Y  d S )NTrH   r6   r   zlooks more like a filenamerO   rP   r   r   r   rR   rS   )r   r   rU   r   r   r   r   r   test_resembles_filename_warning>  s
   "z,TestWarnings.test_resembles_filename_warning)
markuphtmlz
markup.comrJ   z	markup.jsc                 C   sJ   t jdd}| d| }W d    n1 sw   Y  g |ks#J d S )NTrH   r6   r   )r   r   rU   r   r   r   r   "test_resembles_filename_no_warningK  s   z/TestWarnings.test_resembles_filename_no_warningc                 C   st   d}t jdd}t|d}W d    n1 sw   Y  | |t}dt|jv s,J |t|jdvs8J d S )Ns   http://www.crummybytes.com/TrH   r   looks more like a URLr   )rO   rP   r   r   r   rR   rS   r$   r   urlwarning_listr   r   r   r   r   test_url_warning_with_bytes_urlW  s   z,TestWarnings.test_url_warning_with_bytes_urlc                 C   sn   d}t jdd}t|d}W d    n1 sw   Y  | |t}dt|jv s,J |t|jvs5J d S )Nzhttp://www.crummyunicode.com/TrH   r   r   r   r   r   r   r   !test_url_warning_with_unicode_urla  s   z.TestWarnings.test_url_warning_with_unicode_urlc                 C   P   t jdd}| d}W d    n1 sw   Y  tdd |D r&J d S )NTrH   s$   http://www.crummybytes.com/ is greatc                 s       | ]
}d t |jv V  qdS r   NrR   rS   rn   rU   r   r   r   rq   r      zETestWarnings.test_url_warning_with_bytes_and_space.<locals>.<genexpr>rO   rP   r   anyr   r   r   r   r   r   %test_url_warning_with_bytes_and_spacem  s   z2TestWarnings.test_url_warning_with_bytes_and_spacec                 C   r   )NTrH   z&http://www.crummyunicode.com/ is greatc                 s   r   r   r   r   r   r   r   rq   x  r   zGTestWarnings.test_url_warning_with_unicode_and_space.<locals>.<genexpr>r   r   r   r   r   'test_url_warning_with_unicode_and_spaceu  s   z4TestWarnings.test_url_warning_with_unicode_and_spaceN)r?   r@   rA   r   r   r   r   r   r   r   r   rZ   r   r   r   r   r   r   r   r   r   r   r   r   r      s.    


r   c                   @   rX   )TestSelectiveParsingc                 C   s.   d}t d}| j||d}| dksJ d S )Nz&No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>rw   )r   s   <b>Yes</b><b>Yes <c>Yes</c></b>)r   r   r$   )r   r6   strainerr   r   r   r   test_parse_with_soupstrainer~  s   z1TestSelectiveParsing.test_parse_with_soupstrainerN)r?   r@   rA   r   r   r   r   r   r   |  s    r   c                   @   s:   e Zd ZdZdd Zejje dddd Z	dd	 Z
d
S )
TestNewTagz(Test the BeautifulSoup.new_tag() method.c                 C   sb   |  d}|jddddid}t|tsJ d|jksJ tddd|jks(J d |jks/J d S )NrJ   foobaznameza name)barattrs)r   r   )r   new_tagrQ   r   r   rN   r   parent)r   r   r   r   r   r   test_new_tag  s   
zTestNewTag.test_new_tagz-lxml not installed, cannot parse XML document)reasonc                 C   B   t dd}|d}|d}d| ksJ d| ksJ d S )NrJ   xmlbrrx      <br/>s   <p/>r   r   r$   )r   xml_soupxml_brxml_pr   r   r   5test_xml_tag_inherits_self_closing_rules_from_builder  s
   


z@TestNewTag.test_xml_tag_inherits_self_closing_rules_from_builderc                 C   r   )NrJ   r   r   rx   r   s   <p></p>r   )r   	html_souphtml_brhtml_pr   r   r   1test_tag_inherits_self_closing_rules_from_builder  s
   


z<TestNewTag.test_tag_inherits_self_closing_rules_from_builderN)r?   r@   rA   __doc__r   rZ   r   skipifr   r   r   r   r   r   r   r     s    

r   c                   @   s    e Zd ZdZdd Zdd ZdS )TestNewStringz+Test the BeautifulSoup.new_string() method.c                 C   s2   |  d}|d}d|ksJ t|tsJ d S NrJ   r   )r   
new_stringrQ   r   r   r   r}   r   r   r   'test_new_string_creates_navigablestring  s   

z5TestNewString.test_new_string_creates_navigablestringc                 C   s4   |  d}|dt}d|ksJ t|tsJ d S r   )r   r   r   rQ   r   r   r   r   3test_new_string_can_create_navigablestring_subclass  s   
zATestNewString.test_new_string_can_create_navigablestring_subclassN)r?   r@   rA   r   r   r   r   r   r   r   r     s    r   c                   @   s   e Zd Zdd Zdd ZdS )
TestPicklec                 C   s2   |  d}t|}t|}d|jjksJ d S )Nz<a>some markup</a>some markup)r   pickledumpsloadsr_   r   r   r   pickled	unpickledr   r   r   test_normal_pickle  s   


zTestPickle.test_normal_picklec                 C   s6   |  d}d |_t|}t|}d|jksJ d S )Nr   )r   rK   r   r   r   r   r   r   r   r   test_pickle_with_no_builder  s
   


z&TestPickle.test_pickle_with_no_builderN)r?   r@   rA   r   r   r   r   r   r   r     s    r   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )TestEncodingConversionc                 C   s&   d| _ | j d| _| jdksJ d S )NuU   <html><head><meta charset="utf-8"/></head><body><foo>Sacré bleu!</foo></body></html>r"   sU   <html><head><meta charset="utf-8"/></head><body><foo>Sacré bleu!</foo></body></html>)unicode_datar$   r&   r8   r   r   r   setup_method  s   z#TestEncodingConversion.setup_methodc              	   C   s   t j}ttj z9dd }|t _d}| |}| }t|ts#J || 	| ks.J |j
 dks7J W ttj |t _d S ttj |t _w )Nc                 S   r0   r1   r   )rR   r   r   r   noop  r4   z>TestEncodingConversion.test_ascii_in_unicode_out.<locals>.noops   <foo>a</foo>r"   )r   chardet_dammitloggingdisableWARNINGr   r   rQ   rR   document_forr%   lowerNOTSET)r   chardetr   asciisoup_from_asciiunicode_outputr   r   r   test_ascii_in_unicode_out  s   

z0TestEncodingConversion.test_ascii_in_unicode_outc                 C   s@   |  | j}| | jksJ |jjdksJ |jd ksJ d S Nu   Sacré bleu!)r   r   r   r   r   r%   r   soup_from_unicoder   r   r   test_unicode_in_unicode_out  s   z2TestEncodingConversion.test_unicode_in_unicode_outc                 C   s2   |  | j}| | jksJ |jjdksJ d S r  )r   r&   r   r   r   r   )r   soup_from_utf8r   r   r   test_utf8_in_unicode_out  s   z/TestEncodingConversion.test_utf8_in_unicode_outc                 C   s$   |  | j}|d| jksJ d S )Nr"   )r   r   r$   r&   r  r   r   r   test_utf8_out  s   z$TestEncodingConversion.test_utf8_outN)r?   r@   rA   r   r   r  r  r  r   r   r   r   r     s    r   )&r   pdbr   r   osr   rZ   systempfilebs4r   r   r   r   r   bs4.builderr   r	   r
   bs4.elementr   r   r   r   r   rJ   r   r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s,    0)}	"