o
    
zi                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d d	l(m)Z) d d
l*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? e@eAZBG dd de'ZCG dd deCZDe
de	ee5ZEG dd deCeeE ZFG dd deFe5 ZGG dd deFe5 ZHG dd deFe5 ZIG dd deFe5 ZJdS )    N)Sequence)BinaryIOClassVarGenericTextIOTypeVarcast)utils)ImageWriter)LAParamsLTAnnoLTCharLTComponentLTContainerLTCurveLTFigureLTImageLTItemLTLayoutContainerLTLineLTPageLTRectLTText	LTTextBoxLTTextBoxVerticalLTTextGroup
LTTextLineTextGroupElement)PDFColorSpace)PDFTextDevice)PDFValueError)PDFFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOMatrixPathSegmentPointRectapply_matrix_ptapply_matrix_rectbbox2strencmake_compat_strmult_matrixc                   @   s(  e Zd ZU eed< eed< 		d0dedededB ddfd	d
Z	de
deddfddZde
ddfddZdedededdfddZdeddfddZdededdfddZdededededee ddfd d!Zded"ed#ed$ed%ed&ed'ed(edefd)d*Zd"ed&edefd+d,Zd-eddfd.d/ZdS )1PDFLayoutAnalyzercur_itemctm   Nrsrcmgrpagenolaparamsreturnc                 C   s"   t | | || _|| _g | _d S N)r   __init__r7   r8   _stackselfr6   r7   r8    r?   Q/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/pdfminer/converter.pyr;   A   s   
zPDFLayoutAnalyzer.__init__pagec                 C   sB   t ||j\}}}}ddt|| t|| f}t| j|| _d S )Nr   )r-   mediaboxabsr   r7   r3   )r>   rA   r4   x0y0x1y1rB   r?   r?   r@   
begin_pageL   s   zPDFLayoutAnalyzer.begin_pagec                 C   sl   | j rJ tt| j t| jtsJ tt| j| jd ur'| j| j |  j	d7  _	| 
| j d S )Nr5   )r<   strlen
isinstancer3   r   typer8   analyzer7   receive_layout)r>   rA   r?   r?   r@   end_pageQ   s   
zPDFLayoutAnalyzer.end_pagenamebboxmatrixc                 C   s(   | j | j t||t|| j| _d S r:   )r<   appendr3   r   r1   r4   )r>   rP   rQ   rR   r?   r?   r@   begin_figureY   s   zPDFLayoutAnalyzer.begin_figure_c                 C   s@   | j }t| j tsJ tt| j | j | _ | j | d S r:   )r3   rK   r   rI   rL   r<   popadd)r>   rU   figr?   r?   r@   
end_figure]   s   zPDFLayoutAnalyzer.end_figurestreamc                 C   sR   t | jtsJ tt| jt||| jj| jj| jj| jj	f}| j
| d S r:   )rK   r3   r   rI   rL   r   rD   rE   rF   rG   rW   )r>   rP   rZ   itemr?   r?   r@   render_imagec   s   zPDFLayoutAnalyzer.render_imagegstatestrokefillevenoddpathc                    s  d dd  D }|dd dkrdS |ddkr;td|D ]} |d|d }||||| q!dS  fd	d
 D }	fdd
|	D }
dd
  D }fdd
 D }dd
 t||ddD }t|dkr|dd dkr|
d |
d kr|dd d }|
	  |dv rt
|j|
d |
d ||||j|j||jd
}j| dS |dv r(|
\\}}\}}\}}\}}}|
d |
d k}||ko||ko||ko||kp||ko||ko||ko||k}|r|rt|jg |
d |
d R ||||j|j||j	}j| dS t|j|
||||j|j||j	}j| dS t|j|
||||j|j||j	}j| dS )z@Paint paths described in section 4.4 of the PDF reference manual c                 s   s    | ]}|d  V  qdS )r   Nr?   ).0xr?   r?   r@   	<genexpr>u   s    z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>Nr5   mzm[^m]+r   c                    s:   g | ]}t t|d  dkr|dd n d  dd qS )r   hN)r   r*   )rc   p)ra   r?   r@   
<listcomp>   s    .z0PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>c                    s   g | ]}t  j|qS r?   )r,   r4   )rc   ptr>   r?   r@   rj          c                 S   s   g | ]}t |d  qS )r   )rI   rc   	operationr?   r?   r@   rj      rm   c              	      s<   g | ]} fd dt |ddd |ddd ddD qS )c                    s(   g | ]\}}t  jt|t|fqS r?   )r,   r4   float)rc   operand1operand2rl   r?   r@   rj      s    z;PDFLayoutAnalyzer.paint_path.<locals>.<listcomp>.<listcomp>r5   N   Fstrict)ziprn   rl   r?   r@   rj      s    
c                 S   s"   g | ]\}}t t|g|R qS r?   )r   r)   )rc   ori   r?   r?   r@   rj      s    Frt      rh   lhrg   >   mlmlh)original_pathdashing_style>   mlllhmllll   rs   )joincountrefinditerstartend
paint_pathrv   rJ   rV   r   	linewidthscolorncolordashr3   rW   r   r   )r>   r]   r^   r_   r`   ra   shaperf   subpathraw_ptspts	operatorstransformed_pointstransformed_pathlinerD   rE   rF   rG   x2y2x3y3rU   is_closed_loophas_square_coordinatesrectcurver?   )ra   r>   r@   r   l   s   	

	,
zPDFLayoutAnalyzer.paint_pathfontfontsizescalingrisecidncsgraphicstatec	                 C   s   z| |}	t|	tsJ tt|	W n ty#   | ||}	Y nw ||}
||}t||||||	|
|||
}| j	
| |jS r:   )	to_unichrrK   rI   rL   r"   handle_undefined_char
char_width	char_dispr   r3   rW   adv)r>   rR   r   r   r   r   r   r   r   text	textwidthtextdispr[   r?   r?   r@   render_char   s,   


zPDFLayoutAnalyzer.render_charc                 C   s   t d|| d| dS )Nzundefined: %r, %rz(cid:))logdebug)r>   r   r   r?   r?   r@   r   
  s   z'PDFLayoutAnalyzer.handle_undefined_charltpagec                 C      d S r:   r?   r>   r   r?   r?   r@   rN     s   z PDFLayoutAnalyzer.receive_layoutr5   N)__name__
__module____qualname__r   __annotations__r(   r$   intr   r;   r%   rH   rO   rI   r+   rT   rY   r&   r\   r#   boolr   r)   r   r!   rp   r   r   r   r   rN   r?   r?   r?   r@   r2   =   sj   
 
	
}	

!r2   c                	   @   sP   e Zd Z		ddedededB ddfddZd	eddfd
dZdefddZ	dS )PDFPageAggregatorr5   Nr6   r7   r8   r9   c                 C   s   t j| |||d d | _d S N)r7   r8   )r2   r;   resultr=   r?   r?   r@   r;     s   
zPDFPageAggregator.__init__r   c                 C   s
   || _ d S r:   r   r   r?   r?   r@   rN     s   
z PDFPageAggregator.receive_layoutc                 C   s   | j d usJ | j S r:   r   rl   r?   r?   r@   
get_result  s   zPDFPageAggregator.get_resultr   )
r   r   r   r$   r   r   r;   r   rN   r   r?   r?   r?   r@   r     s    
	r   IOTypec                   @   sP   e Zd Z			ddedededededB d	dfd
dZe	de
d	efddZdS )PDFConverterutf-8r5   Nr6   outfpcodecr7   r8   r9   c                 C   s0   t j| |||d || _|| _| | j| _d S r   )r2   r;   r   r   _is_binary_streamoutfp_binary)r>   r6   r   r   r7   r8   r?   r?   r@   r;   )  s   zPDFConverter.__init__c                 C   sL   dt | ddv r
dS t| drdS t| tjrdS t| tjtjfr$dS dS )z"Test if an stream is binary or notbmoderb   TF)getattrhasattrrK   ioBytesIOStringIO
TextIOBase)r   r?   r?   r@   r   6  s   
zPDFConverter._is_binary_stream)r   r5   N)r   r   r   r$   r   rI   r   r   r;   staticmethodr'   r   r   r?   r?   r?   r@   r   (  s&    
r   c                       s   e Zd Z					d dedededed	edB d
ede	dB ddf fddZ
deddfddZdeddfddZdededdfddZdededededee ddfddZ  ZS )!TextConverterr   r5   NFr6   r   r   r7   r8   
showpagenoimagewriterr9   c                    s&   t  j|||||d || _|| _d S )Nr   r7   r8   )superr;   r   r   )r>   r6   r   r   r7   r8   r   r   	__class__r?   r@   r;   G  s   

zTextConverter.__init__r   c                 C   sF   t || jd}| jrtt| j|  d S tt	| j| d S )Nignore)
r	   compatible_encode_methodr   r   r   r   r   writeencoder   r>   r   r?   r?   r@   
write_textU  s   zTextConverter.write_textr   c                    sH   dt dd f fdd jrd|j d  | d d S )Nr[   r9   c                    sz   t | tr| D ]} | qnt | tr|   t | tr'd d S t | tr9jd ur;j|  d S d S d S )N
)	rK   r   r   r   get_textr   r   r   export_imager[   childrenderr>   r?   r@   r   ]  s   



z,TextConverter.receive_layout.<locals>.renderzPage r   )r   r   r   pageidr   r?   r   r@   rN   \  s
   zTextConverter.receive_layoutrP   rZ   c                 C   s    | j d urt| || d S d S r:   )r   r   r\   )r>   rP   rZ   r?   r?   r@   r\   p  s   
zTextConverter.render_imager]   r^   r_   r`   ra   c                 C   r   r:   r?   )r>   r]   r^   r_   r`   ra   r?   r?   r@   r   t  s   zTextConverter.paint_path)r   r5   NFN)r   r   r   r$   r'   rI   r   r   r   r
   r;   r   r   rN   r&   r\   r#   r   r)   r   __classcell__r?   r?   r   r@   r   F  sN    	r   c                   @   s  e Zd ZU dddddddZeeeef  ed< d	dd
Zeeeef  ed< 												dOde	de
dedededB dededededededB ded eeef dB d!eeef dB d"dfd#d$Zd%ed"dfd&d'ZdPd(d)ZdPd*d+Zd%ed"dfd,d-Zd.ed/ed0ed1ed2ed3ed"dfd4d5Zd.ed/ed6ed"dfd7d8Zd6ed/ed0ed1ed2ed3ed"dfd9d:Zd.ed%ed0ed1ed;ed"dfd<d=Z	>dQd.ed/ed0ed1ed2ed3ed?ed"dfd@dAZd.ed"dfdBdCZd%edDedEed"dfdFdGZdPdHdIZdJed"dfdKdLZ dPdMdNZ!dS )RHTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rA   RECT_COLORSblue)r   charTEXT_COLORSr   r5   N      ?normalT2   r   r6   r   r   r7   r8   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr9   c                 C   s   t j| |||||d | jr| jstd| js| jrtd|d u r'ddi}|d u r0ddd}|| _|| _|| _|	| _|
| _	|| _
|| _|| _|rX| j| j | j| j | j	| _d | _g | _|   d S )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr   r   r   )r   rA   )r   r;   r   r   r    r   r   r   r   r   r   r   r   updater   r   _yoffset_font
_fontstackwrite_header)r>   r6   r   r   r7   r8   r   r   r   r   r   r   r   r   r   r?   r?   r@   r;     s>   

zHTMLConverter.__init__r   c                 C   :   | j rtt| j|| j  d S tt| j| d S r:   r   r   r   r   r   r   r   r   r?   r?   r@   r        zHTMLConverter.writec                 C   s<   |  d | jrd| j d}nd}|  | |  d d S )Nz<html><head>
z<<meta http-equiv="Content-Type" content="text/html; charset=">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )r>   sr?   r?   r@   r    s   

zHTMLConverter.write_headerc                 C   s@   dd t d| jD }dd| d}| | | d d S )Nc                 S   s   g | ]}d | d| dqS )z
<a href="#">z</a>r?   )rc   ir?   r?   r@   rj     s    z.HTMLConverter.write_footer.<locals>.<listcomp>r5   z/<div style="position:absolute; top:0px;">Page: z, </div>
z</body></html>
)ranger7   r   r   )r>   
page_linksr  r?   r?   r@   write_footer  s   
zHTMLConverter.write_footerc                 C   s   |  t| d S r:   )r   r/   r   r?   r?   r@   r     s   zHTMLConverter.write_textcolorborderwidthrd   ywrg   c           	      C   sn   | j |}|d ur5d| d| d|| j  d| j| | j  d|| j  d|| j  d}| | d S d S )Nz(<span style="position:absolute; border:  zpx solid; left:px; top:
px; width:px; height:zpx;"></span>
)r   getr   r   r   )	r>   r  r  rd   r  r  rg   color2r  r?   r?   r@   
place_rect  s$   	zHTMLConverter.place_rectr[   c                 C   s    |  |||j|j|j|j d S r:   )r  rD   rG   widthheight)r>   r  r  r[   r?   r?   r@   place_border  s    zHTMLConverter.place_borderc           	      C   st   | j d ur8| j |}dt| d| d|| j  d| j| | j  d|| j  d|| j  d}| | d S d S )Nz
<img src="z
" border="z!" style="position:absolute; left:r  zpx;" width="
" height="" />
)r   r   r/   r   r   r   )	r>   r[   r  rd   r  r  rg   rP   r  r?   r?   r@   place_image  s   
	zHTMLConverter.place_imagesizec              	   C   sv   | j |}|d ur9d| d|| j  d| j| | j  d|| j | j  d	}| | | | | d d S d S )Nz&<span style="position:absolute; color:; left:r  zpx; font-size:px;"></span>
)r   r  r   r   r   r   r   )r>   r  r   rd   r  r  r  r  r?   r?   r@   
place_text  s    

zHTMLConverter.place_textFalsewriting_modec           	      C   sp   | j | j d | _d| d| d| d|| j  d| j| | j  d|| j  d|| j  d}| | d S )	Nz'<div style="position:absolute; border: r  zpx solid; writing-mode:r   r  r  r  r!  )r  rS   r   r   r   r   )	r>   r  r  rd   r  r  rg   r%  r  r?   r?   r@   	begin_div#  s&   
	zHTMLConverter.begin_divc                 C   s.   | j d ur
| d | j | _ | d d S )N</span>z</div>)r   r   r  rV   )r>   r  r?   r?   r@   end_div:  s   

zHTMLConverter.end_divfontnamer   c                 C   sl   ||f}|| j kr/| j d ur| d |dd }| d| d|| j | j  d || _ | | d S )Nr'  +z<span style="font-family: z; font-size:zpx">)r   r   splitr   r   r   )r>   r   r)  r   r   fontname_without_subset_tagr?   r?   r@   put_text@  s   


zHTMLConverter.put_textc                 C      |  d d S )Nz<br>r   rl   r?   r?   r@   put_newlineO     zHTMLConverter.put_newliner   c                    sR   dt tB dd ffdddtdd f fdd  |  jj7  _d S )Nr[   r9   c                    s2   t | tr dd|  | D ]}| qd S d S )Nr   r5   )rK   r   r  r   r>   
show_groupr?   r@   r4  S  s   

z0HTMLConverter.receive_layout.<locals>.show_groupc              
      s  t | trP j| j7  _dd|  jr4dj| j j   d| j d| j d | D ]} | q6| j	d urL| j	D ]}| qEd S d S t | t
r^dd|  d S t | trdd| j| j| j| j | D ]} | qsd d S t | tr| d| j| j| j| j d S jd	krt | trd
d|  | D ]} | qd S t | trوdd|  dt| jd | j| jd | D ]} | qd S t | trdd|  d|  | j| j| j d S d S t | tr| D ]} | qjdkr  d S d S t | tr<dd| j| j| j| j|   | D ]} | q-d d S t | trSt| j} |  || j d S t | t!rb"|   d S d S )NrA   r5   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r   exactr   r      r   loose)#rK   r   r   rG   r  r   r   r   r   groupsr   r   r&  rD   r  r  r(  r   r  r   r   r   r#  rI   indexr   r   r  r1  get_writing_moder0   r)  r.  r   r   )r[   r   groupr)  r   r>   r4  r?   r@   r   Y  s   








 





	
	
z,HTMLConverter.receive_layout.<locals>.render)r   r   r   r   r   r   r?   r<  r@   rN   R  s   LzHTMLConverter.receive_layoutc                 C      |    d S r:   r  rl   r?   r?   r@   close     zHTMLConverter.close)r   r5   Nr5   r   r   Tr   Nr   NNr9   N)r$  )"r   r   r   r   r   dictrI   r   r   r$   r'   r   r   rp   r   r
   r;   r   r  r  r   r  r   r  r   r  r#  r&  r(  r.  r1  r   rN   r?  r?   r?   r?   r@   r     s   
 
		

5

	


	

Vr   c                   @   s   e Zd ZedZ					ddededed	e	d
e
dB dedB deddfddZdeddfddZdddZdddZdeddfddZdeddfddZdddZdS ) XMLConverterz[ ---]r   r5   NFr6   r   r   r7   r8   r   stripcontrolr9   c                 C   sD   t j| |||||d | j| j krtd|| _|| _|   d S )Nr   r   )r   r;   r   r   r    r   rD  r  )r>   r6   r   r   r7   r8   r   rD  r?   r?   r@   r;     s   

zXMLConverter.__init__r   c                 C   r  r:   r  r   r?   r?   r@   r     r  zXMLConverter.writec                 C   s4   | j r| d| j  d n| d | d d S )Nz<?xml version="1.0" encoding="z" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   rl   r?   r?   r@   r    s   
zXMLConverter.write_headerc                 C   r/  )Nz	</pages>
r0  rl   r?   r?   r@   r    r2  zXMLConverter.write_footerc                 C   s&   | j r
| jd|}| t| d S Nrb   )rD  CONTROLsubr   r/   r   r?   r?   r@   r     s   zXMLConverter.write_textr   c                    s>   dt dd ffdddt dd f fdd  | d S )Nr[   r9   c                    st   t | tr d| j dt| j d d S t | tr8 dt| j d | D ]}| q* d d S d S )N<textbox id="" bbox="r  z<textgroup bbox="r  z</textgroup>
)rK   r   r   r9  r.   rQ   r   r   r3  r?   r@   r4    s   


z/XMLConverter.receive_layout.<locals>.show_groupc                    s4  t | trDd| j dt| j d| j d}| | D ]} | q| jd ur=d | jD ]}| q1d d d S t | tr]d| j	 dt| j d	}| d S t | t
rvd
| j	 dt| j d	}| d S t | trd| j	 dt| j d|   d}| d S t | trd| j dt| j d}| | D ]} | qd d S t | tr܈dt| j d | D ]} | qΈd d S t | trd}t | trd}d| j dt| j d| d}| | D ]} | qd d S t | trHdt| j dt| j d| jj d| jj d| jdd}| |   d d S t | tr[d|   d d S t | trjd urj| }d t| d!| j  d"| j! d	 d S d#| j  d"| j! d	 d S t"t#d$| f)%Nz
<page id="rJ  z
" rotate="r  z	<layout>
z
</layout>
z</page>
z<line linewidth="r  z<rect linewidth="z<curve linewidth="z" pts="z"/>
z<figure name="z
</figure>
z<textline bbox="z</textline>
rb   z wmode="vertical"rI  "z>
z</textbox>
z<text font="z" colourspace="z" ncolour="z" size="z.3fr  z</text>
z<text>z<image src="z	" width="r  z<image width="	Unhandled)$rK   r   r   r.   rQ   rotater   r8  r   r   r   r   get_ptsr   rP   r   r   r   r9  r   r/   r)  r   r   r   r  r   r   r   r   r   r   r  r  AssertionErrorrI   )r[   r  r   r;  wmoderP   r<  r?   r@   r     s   

















 

z+XMLConverter.receive_layout.<locals>.renderr   r   r?   r<  r@   rN     s   YzXMLConverter.receive_layoutc                 C   r=  r:   r>  rl   r?   r?   r@   r?  F  r@  zXMLConverter.close)r   r5   NNFrA  )r   r   r   r   compilerG  r$   r'   rI   r   r   r
   r   r;   r   r  r  r   r   rN   r?  r?   r?   r?   r@   rC    s<    
	


grC  c                   @   s   e Zd ZdZedZ				d#deded	e	d
e
dedB defddZdede	fddZde	ddfddZd$ddZd$ddZde	ddfddZd$ddZdeddfdd Zd$d!d"ZdS )%HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]utf8r5   NFr6   r   r   r7   r8   rD  c                 C   s.   t j| |||||d || _d| _|   d S )Nr   F)r   r;   rD  within_charsr  )r>   r6   r   r   r7   r8   rD  r?   r?   r@   r;   ]  s   	zHOCRConverter.__init__rQ   r9   c           
      C   s\   |\}}}}t |}t | jd | }t |}t | jd | }	d| d| d| d|	 S )Nrx   zbbox r  )r   	page_bbox)
r>   rQ   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1r?   r?   r@   	bbox_reprr  s   zHOCRConverter.bbox_reprr   c                 C   s>   | j r|| j }tt| j| d S tt| j| d S r:   )r   r   r   r   r   r   r   )r>   r   encoded_textr?   r?   r@   r   {  s   zHOCRConverter.writec                 C   sp   | j r| d| j  d n| d | d | d | d | d | d | d	 | d
 d S )NzL<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset=''>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
rE  rl   r?   r?   r@   r    s,   


zHOCRConverter.write_headerc                 C   s   |  d |  d d S )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
r0  rl   r?   r?   r@   r    s   
zHOCRConverter.write_footerc                 C   s"   | j r
| jd|}| | d S rF  )rD  rG  rH  r   r   r?   r?   r@   r     s   zHOCRConverter.write_textc                 C   s   t | jdkr?d}d| jv rd}d| jv r|d7 }| d| j d| j d	| d
| | j d| j d| j d| j  d d| _d S )Nr   rb   Italiczfont-style: italic; Boldzfont-weight: bold; z<span style='font:"z"; font-size:z; z' class='ocrx_word' title='z	; x_font z
; x_fsize '>r'  F)	rJ   working_textworking_fontr   working_sizer_  working_bboxstriprU  )r>   bold_and_italic_stylesr?   r?   r@   
write_word  s,   





zHOCRConverter.write_wordr   c                    s$   dt dd f fdd  | d S )Nr[   r9   c                    s  j rt| tr  t| tr6| j_d| j d	| j d | D ]} | q(d d S t| t
rXd	| j d | D ]} | qJd d S t| tr~d| j d	| j d | D ]} | qpd d S t| trj sd	_ |  _| j_| j_| j_d S t|   d
kr  |   d S jd | jd ksɈj| jksɈj| jkrو  | j_| j_| j_ j|  7  _jd
 jd | jd jd f_d S d S )Nz<div class='ocr_page' id='z	' title='ra  r
  z<span class='ocr_line' title='rd  r"  z<div class='ocr_block' id='Tr   r5   rs   rx   )rU  rK   r   rk  r   rQ   rV  r   r   r_  r   r   r9  r   r   re  rh  r)  rf  r  rg  rJ   ri  )r[   r   
child_liner   r?   r@   r     sp   










z,HOCRConverter.receive_layout.<locals>.renderrQ  r   r?   r   r@   rN     s   ;zHOCRConverter.receive_layoutc                 C   r=  r:   r>  rl   r?   r?   r@   r?    r@  zHOCRConverter.close)rT  r5   NFrA  )r   r   r   __doc__r   rR  rG  r$   r'   rI   r   r   r   r;   r+   r_  r   r  r  r   rk  r   rN   r?  r?   r?   r?   r@   rS  J  s8    

	


>rS  )Kr   loggingr   collections.abcr   typingr   r   r   r   r   r   pdfminerr	   pdfminer.imager
   pdfminer.layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   pdfminer.pdfcolorr   pdfminer.pdfdevicer   pdfminer.pdfexceptionsr    pdfminer.pdffontr!   r"   pdfminer.pdfinterpr#   r$   pdfminer.pdfpager%   pdfminer.pdftypesr&   pdfminer.utilsr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   	getLoggerr   r   r2   r   r   r   r   r   rC  rS  r?   r?   r?   r@   <module>   s:     	T4
 V9  / 