o
    
zi                      @   s   d dl Z d dlZd dlmZmZ d dlmZmZmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ eeZ edZ!edZ"G dd dZ#dS )    N)	ContainerIterator)AnyBinaryIOClassVar)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                   @   s   e Zd ZU dZdededededB ddf
dd	Zdefd
dZh dZ	e
ee  ed< ededed  fddZe					d%dedee dB dededededed  fddZdedefddZded edefd!d"Zdedee fd#d$ZdS )&PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelNreturnc                 C   s   || _ || _t|| _|| _t| jd| _t| jdi | _| 	| jd| _
| | jd| j
| _| | jd| _t| jddd d | _| jd	| _| jd
| _dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r   r   r   r   r   getlastmod	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r   r   r    r2   O/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/pdfminer/pdfpage.py__init__1   s   
zPDFPage.__init__c                 C   s   d| j d| jdS )Nz<PDFPage: Resources=z, MediaBox=>)r'   r)   )r1   r2   r2   r3   __repr__P   s   zPDFPage.__repr__>   r"   r    r   r   INHERITABLE_ATTRSdocumentc           	      #   s6   	 ddt dttt f dtt  d B dttttt tt t f f f  f fddz }W n ty>   t	
d }Y nw d}djv rbjd j}|D ]\}} ||t|V  d	}qQ|sjD ]1}| D ](}z|}t|tr|d
tu r ||t|V  W qm ty   Y qmw qgd S d S )Nobjparentvisitedr   c           	      3   s   t | tr| }t| }n	| j}t|  }|d u r"t }||v r(d S || | D ]\}}| j	v rB||vrB|||< q1|
d}|d u rTtjsT|
d}|tu rxd|v rxtd|d  t|d D ]}|||E d H  qjd S |tu rtd| ||fV  d S d S )NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsr7   r%   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)	r9   r:   r;   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr8   r2   r3   rT   \   s6   



z0PDFPage.create_pages.<locals>.depth_first_searchFr   Tr<   N)r   dictstrrD   r   tupler@   get_page_labelsr	   	itertoolsrepeatcatalognextxrefs
get_objidsrA   r?   r%   rK   r   )	rS   r8   page_labelspagesobjectsrC   treexrefr9   r2   rR   r3   create_pagesZ   sH   

&


zPDFPage.create_pagesr    TFfppagenosmaxpagespasswordcachingcheck_extractablec                 c   s    t |}t|||d}|js%|rd|}	t|	d|d}
t|
 t| |D ]\}}|r7||vr7q,|V  |rE||d krE d S q,d S )N)rj   rk   z Text extraction is not allowed: zThe PDF z contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler
   rI   warning	enumeratere   )rS   rg   rh   ri   rj   rk   rl   parserr   	error_msgwarning_msgpagenopager2   r2   r3   	get_pages   s$   


zPDFPage.get_pagesvaluec                 C   sT   d}|d u rt d |S ztdd t|D W S  ty)   t d | Y S w )N)        rx   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc                 s       | ]}t |V  qd S rU   r   .0valr2   r2   r3   	<genexpr>       z*PDFPage._parse_mediabox.<locals>.<genexpr>z2Invalid MediaBox in /Page, defaulting to US Letter)rI   ro   r   r   r   )r1   rw   	us_letterr2   r2   r3   r(      s   
zPDFPage._parse_mediaboxr)   c                 C   sF   |d u r|S zt dd t|D W S  ty"   td | Y S w )Nc                 s   ry   rU   rz   r{   r2   r2   r3   r~      r   z)PDFPage._parse_cropbox.<locals>.<genexpr>z0Invalid CropBox in /Page, defaulting to MediaBox)r   r   r   rI   ro   )r1   rw   r)   r2   r2   r3   r*      s   
zPDFPage._parse_cropboxc                 C   s(   g }|d urt |}t|ts|g}|S rU   )r   r?   list)r1   rw   r-   r2   r2   r3   r,      s   
zPDFPage._parse_contents)Nr   rf   TF)__name__
__module____qualname____doc__r   objectrW   r4   r6   r7   r   rD   __annotations__classmethodr   re   r   r   r@   boolrv   r   r   r(   r*   r   r,   r2   r2   r2   r3   r      sR   
 
=
$r   )$rZ   loggingcollections.abcr   r   typingr   r   r   pdfminerr   pdfminer.pdfdocumentr   r	   r
   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rI   rK   rH   r   r2   r2   r2   r3   <module>   s    
