o
    
ziC                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZmZmZm Z m!Z! dd	l"m#Z#m$Z$ e%e&Z'G d
d deZ(G dd dZ)G dd de)Z*G dd de)Z+G dd de+Z,G dd de)Z-G dd de-Z.G dd de*Z/G dd de-Z0G dd de*Z1G dd de-Z2G d d! d!Z3G d"d# d#e e Z4dS )$zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)IterableIteratorMutableMapping)AnyBinaryIOClassVarTextIOUnioncast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                   @      e Zd ZdS )	CMapErrorN__name__
__module____qualname__ r   r   N/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/pdfminer/cmapdb.pyr   '       r   c                   @   s   e Zd ZdZdeddfddZdefddZd	ed
eddfddZ	dede
ddfddZde
deeB e
B ddfddZdddZdedee
 fddZdS )CMapBaser   kwargsreturnNc                 K   s   |  | _d S N)copyattrsselfr!   r   r   r   __init__.      zCMapBase.__init__c                 C   s   | j dddkS )NWModer   )r%   getr'   r   r   r   is_vertical1      zCMapBase.is_verticalkvc                 C   s   || j |< d S r#   )r%   )r'   r/   r0   r   r   r   set_attr4   r)   zCMapBase.set_attrcodecidc                 C      d S r#   r   )r'   r2   r3   r   r   r   add_code2cid7      zCMapBase.add_code2cidc                 C   r4   r#   r   )r'   r3   r2   r   r   r   add_cid2unichr:   r6   zCMapBase.add_cid2unichrcmapc                 C   r4   r#   r   r'   r8   r   r   r   use_cmap=   r6   zCMapBase.use_cmapc                 C   s   t r#   )NotImplementedError)r'   r2   r   r   r   decode@   r6   zCMapBase.decode)r8   r    r"   N)r   r   r   debugobjectr(   boolr-   strr1   intr5   r   bytesr7   r:   r   r<   r   r   r   r   r    +   s    
r    c                	   @   s   e Zd ZdeeB ddfddZdefddZdeddfd	d
Zde	de
e fddZejddfdedeeef dB deedf ddfddZdS )CMapr!   r"   Nc                 K      t j| fi | i | _d S r#   )r    r(   code2cidr&   r   r   r   r(   E      
zCMap.__init__c                 C      d | jdS )Nz
<CMap: {}>CMapNameformatr%   r+   r,   r   r   r   __repr__I   r.   zCMap.__repr__r8   c                    sV   t |tsJ tt|dtttf dtttf dd f fdd  | j|j d S )Ndstsrcr"   c                    s@   |  D ]\}}t|tri }|| |<  || q|| |< qd S r#   )items
isinstancedict)rL   rM   r/   r0   dr$   r   r   r$   O   s   

zCMap.use_cmap.<locals>.copy)rO   rC   r@   typerP   rA   r>   rE   r9   r   rR   r   r:   L   s   *	zCMap.use_cmapr2   c                 c   sj    t d| | | j}t|D ]#}||v r/|| }t|tr%|V  | j}qttttf |}q| j}qd S )Nzdecode: %r, %r)	logr=   rE   iterrO   rA   r
   rP   r>   )r'   r2   rQ   ixr   r   r   r<   Z   s   
zCMap.decoder   outrE   .c                 C   s|   |d u r	| j }d}t| D ],\}}g ||R }t|tr,|d|d| d q| j|tttt	f ||d qd S )Nr   zcode z = cid 
)rX   rE   r2   )
rE   sortedrN   rO   rA   writedumpr
   rP   r>   )r'   rX   rE   r2   r/   r0   cr   r   r   r\   h   s   
 z	CMap.dump)r   r   r   r@   rA   r(   rK   r    r:   rB   r   r<   sysstdoutr   rP   r>   tupler\   r   r   r   r   rC   D   s"    
rC   c                   @   &   e Zd Zdedeedf fddZdS )IdentityCMapr2   r"   .c                 C   s4   t |d }|rtd| d|d |d  S dS )N   >Hr   lenstructunpackr'   r2   nr   r   r   r<   z   s    zIdentityCMap.decodeNr   r   r   rB   r`   rA   r<   r   r   r   r   rb   y       rb   c                   @   ra   )IdentityCMapByter2   r"   .c                 C   s,   t |}|rtd| d|d | S dS )Nrd   Br   rf   rj   r   r   r   r<      s   zIdentityCMapByte.decodeNrl   r   r   r   r   rn      rm   rn   c                   @   sZ   e Zd ZdeeB ddfddZdefddZdedefd	d
Zej	fde
ddfddZdS )
UnicodeMapr!   r"   Nc                 K   rD   r#   )r    r(   
cid2unichrr&   r   r   r   r(      rF   zUnicodeMap.__init__c                 C   rG   )Nz<UnicodeMap: {}>rH   rI   r,   r   r   r   rK      r.   zUnicodeMap.__repr__r3   c                 C   s   t d| | | j| S )Nget_unichr: %r, %r)rT   r=   rq   r'   r3   r   r   r   
get_unichr   s   
zUnicodeMap.get_unichrrX   c                 C   s4   t | j D ]\}}|d| d|d qd S )Nzcid z = unicode rY   )rZ   rq   rN   r[   )r'   rX   r/   r0   r   r   r   r\      s   zUnicodeMap.dump)r   r   r   r@   rA   r(   rK   rt   r^   r_   r   r\   r   r   r   r   rp      s
    rp   c                   @   s   e Zd ZdedefddZdS )IdentityUnicodeMapr3   r"   c                 C   s   t d| | t|S )z+Interpret character id as unicode codepointrr   )rT   r=   chrrs   r   r   r   rt      s   zIdentityUnicodeMap.get_unichrN)r   r   r   rA   r@   rt   r   r   r   r   ru      s    ru   c                   @   s"   e Zd ZdededdfddZdS )FileCMapr2   r3   r"   Nc                 C   s   t |tr
t |tsJ tt|t|f| j}|d d D ]}t|}||v r5ttttf || }qi }|||< |}qt|d }|||< d S )N)	rO   r@   rA   rS   rE   ordr
   rP   r>   )r'   r2   r3   rQ   r]   citr   r   r   r5      s   zFileCMap.add_code2cid)r   r   r   r@   rA   r5   r   r   r   r   rw      s    rw   c                   @   s*   e Zd ZdedeeB eB ddfddZdS )FileUnicodeMapr3   r2   r"   Nc                 C   s   t |tsJ tt|t |tr t |jtsJ t|j}nt |tr,|dd}nt |tr6t	|}nt
||dkrH| j|dkrHd S || j|< d S )NzUTF-16BEignore     )rO   rA   r@   rS   r   namer   rB   r<   rv   r   rq   r+   )r'   r3   r2   unichrr   r   r   r7      s   



zFileUnicodeMap.add_cid2unichr)r   r   r   rA   r   rB   r7   r   r   r   r   r|          "r|   c                       s*   e Zd Zdededdf fddZ  ZS )PyCMapr   moduler"   Nc                    s.   t  j|d |j| _|jrd| jd< d S d S N)rH      r*   )superr(   CODE2CIDrE   IS_VERTICALr%   )r'   r   r   	__class__r   r   r(      s
   zPyCMap.__init__)r   r   r   r@   r   r(   __classcell__r   r   r   r   r      r   r   c                       s.   e Zd Zdedededdf fddZ  ZS )PyUnicodeMapr   r   verticalr"   Nc                    s4   t  j|d |r|j| _d| jd< d S |j| _d S r   )r   r(   CID2UNICHR_Vrq   r%   CID2UNICHR_H)r'   r   r   r   r   r   r   r(      s
   zPyUnicodeMap.__init__)r   r   r   r@   r   r?   r(   r   r   r   r   r   r      s    &r   c                   @   s   e Zd ZU i Zeeeef  ed< i Z	eeee
e f  ed< G dd deZedeeeef ef deeeef ef fddZed	edee fd
dZed	edefddZedd	ededefddZdS )CMapDB_cmap_cache_umap_cachec                   @   r   )zCMapDB.CMapNotFoundNr   r   r   r   r   CMapNotFound   r   r   rQ   r"   c              
   C   sr   t | ts| S i }|  D ])\}}zt|}W n ttfy$   |}Y nw t |tr2t|||< q|||< q|S )zERecursively convert string keys to integers in CODE2CID dictionaries.)rO   rP   rN   rA   
ValueError	TypeErrorr   _convert_code2cid_keys)rQ   resultr/   r0   new_keyr   r   r   r      s   


zCMapDB._convert_code2cid_keysr   c           
   	   C   sV  | dd}td| tjddtjtjt	df}|D ]}| d}tj||}tj
|}tj
|}||tj rtj|rtd| tj|d	d
dG}t|}	d|	v rmdd |	d  D |	d< d|	v r~dd |	d  D |	d< |	dr| |	d |	d< tt|d|	W  d      S 1 sw   Y  q t|)N  zloading: %r	CMAP_PATHz/usr/share/pdfminer/r8   z.json.gzzloading JSON: %rrtzutf-8)encodingr   c                 S      i | ]	\}}t ||qS r   rA   .0r/   r0   r   r   r   
<dictcomp>      z%CMapDB._load_data.<locals>.<dictcomp>r   c                 S   r   r   r   r   r   r   r   r     r   r   r   )replacerT   r=   osenvironr+   pathjoindirname__file__realpath
startswithsepexistsgzipopenjsonloadrN   r   rS   r@   r   r   )
clsr   
cmap_paths	directoryjson_filename	json_pathresolved_json_pathresolved_directorygzfiledatar   r   r   
_load_data   s@   







$
zCMapDB._load_datac                 C   s   |dkr	t ddS |dkrt ddS |dkrtddS |dkr$tddS z| j| W S  ty3   Y nw | |}t|| | j|< }|S )Nz
Identity-Hr   )r*   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rb   rn   r   KeyErrorr   r   )r   r   r   r8   r   r   r   get_cmap  s    




zCMapDB.get_cmapFr   c                    s\   z| j  | W S  ty   Y nw | d   fdddD | j < | j  | S )Nzto-unicode-c                    s   g | ]}t  |qS r   )r   )r   r0   r   r   r   r   
<listcomp>8  s    z*CMapDB.get_unicode_map.<locals>.<listcomp>)FT)r   r   r   )r   r   r   r   r   r   get_unicode_map1  s   zCMapDB.get_unicode_mapN)F)r   r   r   r   r   rP   r@   r   __annotations__r   listr   r   r   staticmethodr	   r>   rA   r   classmethodrS   r   r   r    r   r?   rp   r   r   r   r   r   r      s    
 &r   c                   @   s   e Zd ZdededdfddZd ddZed	Zed
Z	edZ
edZedZedZedZedZedZedZedZedZedZedZedZedZdededdfddZdeddfddZdS )!
CMapParserr8   fpr"   Nc                 C   s$   t | | || _d| _t | _d S )NT)r   r(   r8   _in_cmapset	_warnings)r'   r8   r   r   r   r   r(   =  s   zCMapParser.__init__c                 C   s8   t t |   W d    d S 1 sw   Y  d S r#   )
contextlibsuppressr   
nextobjectr,   r   r   r   runD  s   
"zCMapParser.runs	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 C   s.  || j u rd| _|   dS || ju rd| _dS | jsdS || ju rDz| d\\}}\}}| jt|| W dS  t	yC   Y dS w || j
u rtz| d\\}}| jtt| W dS  t	yi   Y dS  tjys   Y dS w || ju r|   dS || ju r|   dS || ju r|   dS || ju r2dd |  D }td|D ]\}}	}
t|ts| d	 qt|	ts| d
 qt|
ts| d qt|t|	kr| d q|dd }|	dd }||kr| d q|dd }|	dd }t|}t|}t|}t|| d D ]}|td|| | d  }| j|
| | qqdS || ju r>|   dS || ju rmdd |  D }td|D ]\}
}t|trit|
tri| j|
| qRdS || j u ry|   dS || j!u r9dd |  D }td|D ]\}}	}t|ts| d qt|	ts| d qt|t|	kr| d qt|}t|	}t|t"rt||| d kr| d t#t||d |ddD ]\}
}| j|
| qqt|tsJ |dd }t|}|dd }t|}t|| d D ]}|td|| | d  }| j|| | qqdS || j$u rE|   dS || j%u rvdd |  D }td|D ]\}
}t|
trrt|trr| jt|
| qYdS || j&u r|   dS || j'u r|   dS | (||f dS )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrc   r   c                 S      g | ]\}}|qS r   r   r   __objr   r   r   r         z)CMapParser.do_keyword.<locals>.<listcomp>   z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>Lc                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   zThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.)strictc                 S   r   r   r   r   r   r   r   r     r   ))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr8   r1   r   r   KEYWORD_USECMAPr:   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rO   rB   
_warn_oncerA   rg   r   rangerh   packr7   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGEr   zipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r'   r   r   _r/   r0   cmapnameobjs
start_byteend_byter3   start_prefix
end_prefixsvarevarstartendvlenrV   rW   r2   unicode_valuevarbaseprefixr   r   r   
do_keywordY  s  















zCMapParser.do_keywordmsgc                 C   s0   || j vr| j | d}td|| dS dS )z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. z%s%sN)r   addrT   warning)r'   r  base_msgr   r   r   r     s   
zCMapParser._warn_once)r"   N)r   r   r   r    r   r(   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rA   r   r  r@   r   r   r   r   r   r   <  s,    
 r   )5__doc__r   r   r   loggingr   os.pathrh   r^   collections.abcr   r   r   typingr   r   r   r   r	   r
   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   rT   r   r    rC   rb   rn   rp   ru   rw   r|   r   r   r   r   r   r   r   r   <module>   s<     	
5		
`