o
    
zi                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ edZe eZd	ed
efddZded
dfddZG dd dZdS )    N)Iterable)ClassVarcast)glyphname2unicode)ENCODINGPDFKeyError)	PSLiteralz[0-9a-fA-F]+namereturnc                    s@  t | tstd|  dt|  | dd } | d}t|dkr+dtt|S | t	v r3t	|  S | 
drm| d t rlt d	 dkrl fd
dtdt d	D }|D ]}t| q[tt|}d|S n+| 
dr| d}t|rd	t|  krdkrn nt|dd}t| t|S td|  d)a  Converts Adobe glyph names to Unicode numbers.

    In contrast to the specification, this raises a KeyError instead of return
    an empty string when the key is unknown.
    This way the caller must explicitly define what to do
    when there is not a match.

    Reference:
    https://github.com/adobe-type-tools/agl-specification#2-the-mapping

    :returns unicode character if name resembles something,
    otherwise a KeyError
    z Could not convert unicode name "z?" to character because it should be of type str but is of type .r   _    uni   c                    s$   g | ]}t  ||d   ddqS )r      base)int).0iname_without_uni R/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/pdfminer/encodingdb.py
<listcomp>1   s    z name2unicode.<locals>.<listcomp>u   r   r   z6" to character because it does not match specification)
isinstancestrr   typesplitlenjoinmapname2unicoder   
startswithstripHEXADECIMALmatchrange#raise_key_error_for_invalid_unicodechrr   )r
   
componentsunicode_digitsdigit
charactersname_without_uunicode_digitr   r   r   r&      s@   









&
r&   r3   c                 C   s.   d|   k r
dk rn dS t d|  ddS )zUnicode values should not be in the range D800 through DFFF because
    that is used for surrogate pairs in UTF-16

    :raises KeyError if unicode digit is invalid
    i  i   zUnicode digit z8 is invalid because it is in the range D800 through DFFFNr   )r3   r   r   r   r,   H   s
   
r,   c                
   @   s  e Zd ZU i Zeeeef  ed< i Z	eeeef  ed< i Z
eeeef  ed< i Zeeeef  ed< eD ]#\ZZZZZeeZerHeee< erNee	e< erTee
e< erZeee< q7ee	e
edZeeeeeef f  ed< e	dded	ee dB d
eeef fddZdS )
EncodingDBstd2unicodemac2unicodewin2unicodepdf2unicode)StandardEncodingMacRomanEncodingWinAnsiEncodingPDFDocEncoding	encodingsNr
   diffr   c                 C   s   | j || j}|rN| }d}|D ];}t|tr|}qt|trMzttt	|j
||< W n ttfyH } ztt	| W Y d }~nd }~ww |d7 }q|S )Nr   r   )r=   getr5   copyr   r   r	   r&   r   r    r
   KeyError
ValueErrorlogdebug)clsr
   r>   cid2unicodecidxer   r   r   get_encodingl   s"   

zEncodingDB.get_encoding)N)__name__
__module____qualname__r5   r   dictr   r    __annotations__r6   r7   r8   r   r
   stdmacwinpdfr&   cr=   classmethodr   objectrJ   r   r   r   r   r4   U   s<   
 "

r4   )loggingrecollections.abcr   typingr   r   pdfminer.glyphlistr   pdfminer.latin_encr   pdfminer.pdfexceptionsr   pdfminer.psparserr	   compiler)   	getLoggerrK   rC   r    r&   r   r,   r4   r   r   r   r   <module>   s    

8