o
    
ziN                  
   @   sD  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZmZ d dlmZ eeZejZejZejZejZejZG dd dZG dd	 d	eZG d
d deZe
deeZG dd de	e ZeeZeeZej Z!ej Z"e"dZ#e"dZ$e"dZ%e"dZ&e"dZ'e"dZ(dede)fddZ*dedefddZ+e,dZ-e,dZ.e,dZ/e,dZ0e,dZ1e,d Z2e,d!Z3e,d"Z4e,dZ5e,d#Z6e,d$Z7d%d&d'd(d)d*d+d,d-Z8ee9e:eee;f Z<G d.d/ d/Z=e
d0Z>ee)e9e:ee;e?e e@eef e>f ZAeBeCeAe> f ZDG d1d2 d2e=e	e> ZEdS )3    N)Iterator)AnyBinaryIOGenericTypeVarUnion)psexceptionssettings)choplistc                   @   s   e Zd ZdZdS )PSObjectz0Base class for all PS or PDF-related data types.N)__name__
__module____qualname____doc__ r   r   P/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/pdfminer/psparser.pyr      s    r   c                   @   s<   e Zd ZdZeeef ZdeddfddZdefddZ	dS )		PSLiteralaa  A class that represents a PostScript literal.

    Postscript literals are used as identifiers, such as
    variable names, property names and dictionary keys.
    Literals are case sensitive and denoted by a preceding
    slash sign (e.g. "/Name")

    Note: Do not create an instance of PSLiteral directly.
    Always use PSLiteralTable.intern().
    namereturnNc                 C   
   || _ d S Nr   selfr   r   r   r   __init__/      
zPSLiteral.__init__c                 C      | j }d|S N/r   r   r   r   r   __repr__2      
zPSLiteral.__repr__)
r   r   r   r   r   strbytesNameTyper   r   r   r   r   r   r   !   s
    r   c                   @   s0   e Zd ZdZdeddfddZdefddZdS )		PSKeywordaN  A class that represents a PostScript keyword.

    PostScript keywords are a dozen of predefined words.
    Commands and directives in PostScript are expressed by keywords.
    They are also used to denote the content boundaries.

    Note: Do not create an instance of PSKeyword directly.
    Always use PSKeywordTable.intern().
    r   r   Nc                 C   r   r   r   r   r   r   r   r   B   r   zPSKeyword.__init__c                 C   r   r   r   r   r   r   r   r   E   r    zPSKeyword.__repr__)r   r   r   r   r"   r   r!   r   r   r   r   r   r$   7   s    
r$   _SymbolTc                   @   s:   e Zd ZdZdee ddfddZdejdefdd	Z	dS )
PSSymbolTablezA utility class for storing PSLiteral/PSKeyword objects.

    Interned objects can be checked its identity with "is" operator.
    klassr   Nc                 C   s   i | _ || _d S r   dictr'   )r   r'   r   r   r   r   S   r    zPSSymbolTable.__init__r   c                 C   s0   || j v r| j | }|S | |}|| j |< |S r   r(   )r   r   litr   r   r   internW   s   



zPSSymbolTable.intern)
r   r   r   r   typer%   r   r   r#   r+   r   r   r   r   r&   M   s    r&      {   }   [   ]s   <<s   >>xr   c                 C   sd   t | tr$t | jtr| jS zt| jdW S  ty#   t| j Y S w tjr.td| t| S )Nutf-8zLiteral required: )
isinstancer   r   r!   UnicodeDecodeErrorr	   STRICTPSTypeError)r1   r   r   r   literal_namen   s   
r7   c                 C   s8   t | tstjrtd| | }|S t| jdd}|S )NzKeyword required: r2   ignore)r3   r$   r	   r5   r6   r!   r   )r1   r   r   r   r   keyword_name|   s   
r9   s   [\r\n]s   \ss   \Ss   [0-9a-fA-F]s   [#/%\[\]()<>{}\s]s   [^\s0-9a-fA-F]s   [0-9a-fA-F]{2}|.s   [^0-9]s   [()\134]s   [0-7]   	   
         (   )   \   )   b   t   n   f   r   (   )   \c                   @   s  e Zd ZdZdZdeddfddZdefdd	Zd4d
dZ	de
ddfddZdefddZdee
ef fddZdee fddZdede
de
fddZdeddfddZdede
de
fddZdede
de
fddZdede
de
fd d!Zdede
de
fd"d#Zdede
de
fd$d%Zdede
de
fd&d'Zdede
de
fd(d)Zdede
de
fd*d+Zdede
de
fd,d-Zdede
de
fd.d/Zdede
de
fd0d1Z dee
ef fd2d3Z!dS )5PSBaseParserz=Most basic PostScript parser that performs only tokenization.i   fpr   Nc                 C   s   || _ d| _| d d S )NFr   )rK   eofseekr   rK   r   r   r   r      s   zPSBaseParser.__init__c                 C   s    d| j j d| jd| j dS )N<z: z	, bufpos=>)	__class__r   rK   bufposr   r   r   r   r      s    zPSBaseParser.__repr__c                 C      d S r   r   rS   r   r   r   flush      zPSBaseParser.flushposc                 C   sN   t d| | j| || _d| _d| _| j| _d| _	d| _
g | _d| _dS )z'Seeks the parser to the given position.zseek: %r    r   FN)logdebugrK   rM   rR   bufcharpos_parse_main_parse1	_curtoken_curtokenpos_tokensrL   r   rW   r   r   r   rM      s   
zPSBaseParser.seekc                 C   sH   | j t| jk r
dS | j | _| j| j| _| jstdd| _ dS )NFUnexpected EOFr   )	r\   lenr[   rK   tellrR   readBUFSIZPSEOFrS   r   r   r   fillbuf   s   zPSBaseParser.fillbufc                 C   s   d}| j | j }d}	 |   |r,| j| j| jd  }|dkr+||7 }|  jd7  _n:t| j| j}|rU|| j| j|d 7 }|d| _|dd dkrTd	}nn|| j| jd 7 }t| j| _qt	d
|| ||fS )z3Fetches a next line that ends either with \r or \n.rX   F      
r   N   Tznextline: %r, %r)
rR   r\   ri   r[   EOLsearchendrd   rY   rZ   )r   linebuflineposeolcmr   r   r   nextline   s.   zPSBaseParser.nextlinec                 c   s    | j dtj | j  }d}|dkr]|}td|| j }| j | | j || }|s0dS 	 t|d|d}|dkrE|| }n||d | V  |d| }d}q1|dksdS dS )ziFetches a next line backward.

        This is used to locate the trailers at the end of a file.
        r   rX   rj   rm   rk   rl   N)	rK   rM   ioSEEK_ENDre   maxrg   rf   rfind)r   rW   r[   prevpossnr   r   r   revreadlines   s*   
zPSBaseParser.revreadlinesr|   ic                 C   sb  t ||}|st|S |d}|||d  }| j| | _|dkr.d| _| j| _|d S |dkr=d| _| j	| _|d S |dv sE|
 rP|| _| j| _|d S |dkr_|| _| j| _|d S | rn|| _| j| _|d S |dkrd| _d| _| j| _|d S |d	krd| _| j| _|d S |d
krd| _| j| _|d S |dkr|d S | t| |d S )Nr   rj      %   /rX   s   -+   .rG      <   >    )NONSPCro   rd   startrR   r`   r_   _parse_commentr^   _parse_literalisdigit_parse_number_parse_floatisalpha_parse_keywordparen_parse_string_parse_wopen_parse_wclose
_add_tokenKWDr   r|   r   ru   jrt   r   r   r   r]      sV   
zPSBaseParser._parse_mainobjc                 C   s   | j | j|f d S r   )ra   appendr`   )r   r   r   r   r   r   .  s   zPSBaseParser._add_tokenc                 C   sZ   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _| j| _|S Nr   )rn   ro   r_   rd   r   r]   r^   r   r|   r   ru   r   r   r   r   r   1  s   
zPSBaseParser._parse_commentc                 C   s   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _|||d  }|dkr>d| _| j| _|d S zt| jd}W n t	yR   | j}Y nw | 
t| | j| _|S )Nr   rj      #rX   r2   )END_LITERALro   r_   rd   r   hex_parse_literal_hexr^   r!   	Exceptionr   LITr]   )r   r|   r   ru   r   rt   r   r   r   r   r   =  s&   

zPSBaseParser._parse_literalc                 C   sl   |||d  }t |rt| jdk r|  j|7  _|d S | jr0|  jtt| jdf7  _| j| _|S )Nrj         )	HEXmatchrd   r   r_   r"   intr   r^   r   r|   r   rt   r   r   r   r   Q  s   zPSBaseParser._parse_literal_hexc                 C   s   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _|||d  }|dkrB|  j|7  _| j| _|d S tt	 | 
t| j W d    n1 sZw   Y  | j| _|S )Nr   rj   r   )
END_NUMBERro   r_   rd   r   r   r^   
contextlibsuppress
ValueErrorr   r   r]   r   r   r   r   r   [  s    
zPSBaseParser._parse_numberc                 C   s   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _tt | t	| j W d    n1 s?w   Y  | j
| _|S r   )r   ro   r_   rd   r   r   r   r   r   floatr]   r^   r   r   r   r   r   l  s   
zPSBaseParser._parse_floatc                 C   s   t ||}|r|d}|  j||| 7  _n|  j||d  7  _t|S | jdkr0d}n| jdkr8d}nt| j}| | | j| _|S )Nr   s   trueTs   falseF)	END_KEYWORDro   r   r_   rd   r   r   r]   r^   r   r|   r   ru   r   tokenr   r   r   r   x  s   




zPSBaseParser._parse_keywordc                 C   s   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _|||d  }|dkr>d| _| j| _|d S |dkrT|  jd7  _|  j|7  _|d S |dkrm|  jd8  _| jrm|  j|7  _|d S | 	| j | j
| _|d S )Nr   rj   rI   rX   rG   rH   )
END_STRINGro   r_   rd   r   oct_parse_string_1r^   r   r   r]   r   r   r   r   r     s.   
zPSBaseParser._parse_stringc                 C   s   |||d  }t |rt| jdk r|  j|7  _|d S | jrHt| jd}|dk s8J d| jd| d|  jt|f7  _| j| _|S |t	v rY|  jtt	| f7  _n|dkrut||d kru||d |d	  d
kru|d7 }| j| _|d S )z;Parse literal strings

        PDF Reference 3.2.3
        rj      r:      zInvalid octal z ()rm   r   rk   )

OCT_STRINGr   rd   r   r   r_   r"   r   r^   
ESC_STRING)r   r|   r   rt   chrcoder   r   r   r     s     0zPSBaseParser._parse_string_1c                 C   sB   |||d  }|dkr|  t | j| _|d7 }|S | j| _|S )Nrj   r   )r   KEYWORD_DICT_BEGINr]   r^   _parse_hexstringr   r   r   r   r     s   
zPSBaseParser._parse_wopenc                 C   s6   |||d  }|dkr|  t |d7 }| j| _|S )Nrj   r   )r   KEYWORD_DICT_ENDr]   r^   r   r   r   r   r     s   
zPSBaseParser._parse_wclosec                 C   s~   t ||}|s|  j||d  7  _t|S |d}|  j||| 7  _tdd td| j}| | | j	| _
|S )Nr   c                 S   s   t t| ddfS )Nr   r   )r"   r   group)ru   r   r   r   <lambda>  s    z/PSBaseParser._parse_hexstring.<locals>.<lambda>rX   )END_HEX_STRINGro   r_   rd   r   HEX_PAIRsubSPCr   r]   r^   r   r   r   r   r     s   

zPSBaseParser._parse_hexstringc                 C   s   | j rtd| js@z|  }|r| jr| dd n	| | j| j| _W n ty<   | dd| _d| _ | js: Y nw | jr
| jd}t	
d| |S )Nrc   rk   r   Tznexttoken: %r)rL   rh   ra   ri   r_   r^   r[   r\   poprY   rZ   )r   changed_streamr   r   r   r   	nexttoken  s(   
zPSBaseParser.nexttokenr   N)"r   r   r   r   rg   r   r   r!   r   rU   r   rM   boolri   tupler"   rv   r   r~   r]   PSBaseParserTokenr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ      s0    
.

rJ   ExtraTc                   @   s   e Zd ZdeddfddZdddZdeddfd	d
Zdee	 ddfddZ
dedeee	  fddZdeee	  fddZdee	 ddfddZdededdfddZdedeeeee	  f fddZdededdfddZdee	 fddZdS ) PSStackParserrK   r   Nc                 C      t | | |   d S r   )rJ   r   resetrN   r   r   r   r        zPSStackParser.__init__c                 C   s   g | _ d | _g | _g | _d S r   )contextcurtypecurstackresultsrS   r   r   r   r     s   
zPSStackParser.resetrW   c                 C   r   r   )rJ   rM   r   rb   r   r   r   rM     r   zPSStackParser.seekobjsc                 G   s   | j | d S r   )r   extendr   r   r   r   r   push  s   zPSStackParser.pushr}   c                 C   s$   | j | d  }g | j | d < |S r   r   )r   r}   r   r   r   r   r     s   zPSStackParser.popc                 C   s   | j }g | _ |S r   r   r   r   r   r   popall!  s   zPSStackParser.popallc                 G   s>   zt d| W n ty   t d Y nw | j| d S )Nzadd_results: %rz!add_results: (unprintable object))rY   rZ   r   r   r   r   r   r   r   add_results&  s   zPSStackParser.add_resultsr,   c                 C   s6   | j || j| jf |g | _| _td|| d S )Nzstart_type: pos=%r, type=%r)r   r   r   r   rY   rZ   )r   rW   r,   r   r   r   
start_type-  s   zPSStackParser.start_typec                 C   s\   | j |krtd| j d|dd | jD }| j \}| _ | _td||| ||fS )NzType mismatch: z != c                 S   s   g | ]\}}|qS r   r   ).0_r   r   r   r   
<listcomp>5  s    z*PSStackParser.end_type.<locals>.<listcomp>z"end_type: pos=%r, type=%r, objs=%r)r   r6   r   r   r   rY   rZ   )r   r,   r   rW   r   r   r   end_type2  s   
zPSStackParser.end_typer   c                 C   rT   r   r   )r   rW   r   r   r   r   
do_keyword:  rV   zPSStackParser.do_keywordc                 C   s  | j s|  \}}t|ttttttfr| 	||f n|t
kr'| |d n|tkrCz
| 	| d W n tyB   tjr@ Y nw |tkrN| |d n|tkrz+| d\}}t|d dkrkd|}t|dd td|D }| 	||f W nZ ty   tjr Y nNw |tkr| |d nB|tkrz
| 	| d W n3 ty   tjr Y n'w t|trtd	||| j | || ntd
||| j | || t| jrq |    | j r| j !d}z	td| W |S  t"y   td Y |S w )zYields a list of objects.

        Arrays and dictionaries are represented as Python lists and
        dictionaries.

        :return: keywords, literals, strings, numbers, arrays and dictionaries.
        adr   r   zInvalid dictionary construct: c                 S   s"   i | ]\}}|d urt ||qS r   )r7   )r   kvr   r   r   
<dictcomp>^  s
    z,PSStackParser.nextobject.<locals>.<dictcomp>pz&do_keyword: pos=%r, token=%r, stack=%rz)unknown token: pos=%r, token=%r, stack=%rznextobject: %rz nextobject: (unprintable object))#r   r   r3   r   r   r   r!   r"   r   r   KEYWORD_ARRAY_BEGINr   KEYWORD_ARRAY_ENDr   r6   r	   r5   r   r   rd   PSSyntaxErrorr
   KEYWORD_PROC_BEGINKEYWORD_PROC_ENDr$   rY   rZ   r   r   errorPSExceptionr   rU   r   r   )r   rW   r   r   	error_msgr   r   r   r   r   
nextobject=  s   

AzPSStackParser.nextobjectr   )r   r   r   r   r   r   r   rM   PSStackEntryr   r   listr   r   r   r!   r   r   PSStackTyper   r$   r   r   r   r   r   r   r   
  s    
"r   )Fr   rw   loggingrecollections.abcr   typingr   r   r   r   r   pdfminerr   r	   pdfminer.utilsr
   	getLoggerr   rY   r   rh   r   r6   PSValueErrorr   r   r$   r%   r&   PSLiteralTablePSKeywordTabler+   r   r   r   r   r   r   r   r   r!   r7   r9   compilern   r   r   r   r   r   r   r   r   r   r   r   r   r   r"   r   rJ   r   r   r)   r   r   r   r   r   r   r   r   r   <module>   sz   











  d