o
     XxiD<                     @   s  d dl Z d dlmZmZ d dlmZ d dlZd dlZd dlm	Z	 e
eZeej e Zeej edZee ee dZdZdZd	Zd
ZdZe Zdd ZG dd deZdd Z G dd deZ!d'ddZ"dd Z#G dd deZ$edkrd%ddfd%ddfd %dd%dfd!%dd"%dfd!%dd#%dfd$%dd%dfd%%dd%dfd&%dd%dfgZ&e"e&\Z'Z(e#e(Z)e$e)gZ*e+e*,d%d e+e*,d%d dS dS )(    N)packunpackOrderedDict)	lru_cachez0%(asctime)s	%(name)s - %(levelname)s	%(message)s                   c                 C   s   t |  t|  d S N)loggersetLevelhandler)level r   I/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/janome/fst.pyset_fst_log_level+   s   
r   c                   @   s   e Zd ZdZg dZdddZdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )Statez
    State Class
    )idfinal	trans_mapfinal_outputNc                 C   s   || _ d| _i | _t | _d S NF)r   r   r   setr   )selfr   r   r   r   __init__6   s   zState.__init__c                 C      | j S r   r   r   r   r   r   is_final<      zState.is_finalc                 C   s
   || _ d S r   r   )r   r   r   r   r   	set_final?      
zState.set_finalc                 C   s   || j v r| j | d S d S )Nstate)r   r   charr   r   r   
transitionB   s   
zState.transitionc                 C   s.   ||| j vr	t n| j | d d| j |< d S )Noutput)r%   r)   r   bytes)r   r'   r%   r   r   r   set_transitionH   s   zState.set_transitionc                 C   r   r   )r   r    r   r   r   state_outputL   r"   zState.state_outputc                 C   s   t dd |D | _d S )Nc                 S   s   g | ]}t |qS r   )r+   .0er   r   r   
<listcomp>P   s    z*State.set_state_output.<locals>.<listcomp>r   r   )r   r)   r   r   r   set_state_outputO   s   zState.set_state_outputc                 C      t  | _d S r   r2   r    r   r   r   clear_state_outputR   s   zState.clear_state_outputc                 C   s   || j v r| j | d S t S Nr)   r*   r&   r   r   r   r)   U   s   
zState.outputc                 C   s$   || j v rt|| j | d< d S d S r6   r*   )r   r'   outr   r   r   
set_output[   s   
zState.set_outputc                 C   s   d| _ i | _t | _d S r   )r   r   r   r   r    r   r   r   clear_   s   zState.clearc                 C   s:   |d u s	t |tsdS | j|jko| j|jko| j|jkS r   )
isinstancer   r   r   r   )r   otherr   r   r   __eq__d   s   

zState.__eq__c                 C   s"   t t| jt| j t| j S r   )hashstrr   r   r   r    r   r   r   __hash__m   s   "zState.__hash__r   )__name__
__module____qualname____doc__	__slots__r   r!   r#   r(   r,   r-   r3   r5   r)   r8   r9   r<   r?   r   r   r   r   r   0   s     
	r   c                 C   s\   t |}| j|_| j D ]\}}||t|d  |||d  qt| j|_|S )Nr%   r)   )r   r   r   itemsr,   copyr8   r   )srcr   r%   ctr   r   r   
copy_stateq   s   rJ   c                   @   sL   e Zd ZdZdZdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd ZdS )FSTz&
    FST (final dictionary) class
    i c                 C   r4   r   )r   
dictionaryr    r   r   r   r      s   zFST.__init__c                 C   s
   t | jS r   )lenrL   r    r   r   r   size   r$   zFST.sizec                 C   s   | j t|S r   )rL   getr=   r   r%   r   r   r   member      z
FST.memberc                 C   s   || j t|< d S r   rL   r=   rP   r   r   r   insert   s   z
FST.insertc                 C   s   | j t|= d S r   rS   rP   r   r   r   remove   rR   z
FST.removec                 C   s   t | jtjkS r   )rM   rL   rK   MAX_SIZEr    r   r   r   exceed_max_size   rR   zFST.exceed_max_sizec                 C   s   | j  D ];}|j D ]\}}tdt|jt|t|d jt|d g q| r@tdt|jtdt|j	g qd S )N	r%   r)   r   )
rL   valuesr   rE   printjoinr>   r   r!   r   )r   srH   vr   r   r   print_dictionary   s   4$zFST.print_dictionaryN)r@   rA   rB   rC   rV   r   rN   rQ   rT   rU   rW   r^   r   r   r   r   rK   {   s    rK   c                    sv  t | }td|  t  g }t } fdd}dd }t }t }d}	| D ]`\}}||ks2J |||}
|D ]}t| q9t |t |krW|t  t |t |ksIt	t ||
dD ]}||d  
||d  |||  q_t	|
d t |d D ]}||   ||d  
||d  ||  q||kr|t | d	 |t | tt g t	d|
d D ]}g }||d  ||d  }d}|t |k r|t |k r|| || kr|||  |d7 }|t |k r|t |k r|| || ks|t |d  }||d  ||d  | tD ]}|| |d ur4||| | }|| || q||  rXt }||  D ]
}|||  qF|| | |t |d  }q||krr|t |  | n
||
 ||
 | |}|	d7 }	|r|  q't	t |ddD ]}||d  
||d  |||  q||d  td
    |	 fS )Nz(partial) input size: %dc                    s.     | }|d u rt|   } | |S r   )rQ   rJ   rN   rT   )r%   r\   fstDictr   r   find_minimized   s
   

z1create_minimum_transducer.<locals>.find_minimizedc                 S   s`   d}|t | k r.|t |k r.| | || kr.|d7 }|t | k r.|t |k r.| | || ks|S )Nr   r   rM   )s1s2ir   r   r   
prefix_len   s
   ((z-create_minimum_transducer.<locals>.prefix_lenr   r   Tznum of state: )rM   r   inforK   r+   CHARSaddappendr   ranger,   r9   r#   r3   r   r)   r8   r(   r!   r-   debugrN   )inputson_progressinputs_sizebuffer	prev_wordra   rf   current_wordcurrent_output	processedpref_lenrH   re   jcommon_prefixr)   kword_suffix
new_outputtmp_settmp_strr   r_   r   create_minimum_transducer   sx   	
&"((
(r~   c                 C   s  g }i }d}t | j D ]\}}t t|j ddD ]z\}\}}t }	d}
dt }}|dkr5|
t7 }
|d rG|
t	7 }
t
|d }|d }|	td|
7 }	|	td|7 }	|dkrd|	td|7 }	|	|7 }	||d j}|d	usrJ |t
|	 d
 | }|dksJ |	td|7 }	|t|	 |t
|	7 }q| rt }	t}
d}|jrtdd |jD r|
t7 }
t
|j}|js|
t7 }
|	td|
7 }	|r|	td|7 }	|jD ]}t
|}|	td|7 }	|r|	|7 }	q|t|	 |t
|	7 }|||j< qtdt
|  |  d|S )z5
    convert FST to byte array representing arcs
    r   T)reverser)   bBIr%   Nr	   c                 s   s    | ]	}t |d kV  qdS )r   Nrb   r.   r   r   r   	<genexpr>-  s    zcompileFST.<locals>.<genexpr>zcompiled arcs size:     )	enumeraterL   rY   sortedr   rE   	bytearrayr+   FLAG_LAST_ARCFLAG_ARC_HAS_OUTPUTrM   r   rO   r   rk   r!   FLAG_FINAL_ARCr   anyFLAG_ARC_HAS_FINAL_OUTPUTr   rm   r   r[   )fstarcsaddressposnumr\   re   rH   r]   baryflagoutput_sizer)   	next_addrtargetoutput_countr7   r   r   r   
compileFST  sd   "


r   c                   @   s:   e Zd ZdddZdddZdd	 Zed
ddd ZdS )Matcher   r
   c                 C   sJ   |r#|| _ t|| _dd tt|D | _|| _|| _t | _	d S d S )Nc                 S   s   g | ]}t  qS r   r   )r/   _r   r   r   r1   M  s    z$Matcher.__init__.<locals>.<listcomp>)
	dict_datarM   dict_lenrl   cachemax_cache_sizemax_cached_word_len	threadingLocklock)r   r   r   r   r   r   r   r   H  s   
zMatcher.__init__Tc                 C   s4   t  }t| jD ]}|| |||O }qt||fS r   )r   rl   r   _runbool)r   wordcommon_prefix_matchr)   re   r   r   r   runR  s   zMatcher.runc              	   C   s6  t  }d}d }}| j| }t|t|}	}
tt|	| jddD ]L}|d | | j| v rm| j| |d |  \}}}| j" | j| |d | = |t ||f| j| |d | < W d    n1 sdw   Y  |} nq!||
k r| ||}|d t	@ r|s||	kr|d D ]	}|
||  q|d t@ s||	kr	 |S ||d 7 }|| jk r| j+ |t ||f| j| |d | < t| j| | jkr| j| jdd W d    n1 sw   Y  n3||	k r|| |d	 kr||d 7 }|d	7 }||d
 7 }n|d t@ r	 |S ||d 7 }n	 |S ||
k ss|S )Nr   r   r   rg         F)lastr   r	   )r   r   rM   rl   minr   r   r   next_arcr   rj   r   r   popitem)r   r   data_numr   outputsbufre   r   dataword_lendata_lenrw   arcr7   r   r   r   r   X  s\   
"


 
"zMatcher._runi   )maxsizec                 C   sJ  |dksJ |}d}t  }dg}d}|| }|d7 }|t@ ra|t@ r`td|||d  d }	|d7 }g }
t|	D ]$}td|||d  d }|d7 }|r]|
||||   ||7 }q9|
}n:|| }|d7 }|t@ rtd|||d  d }|d7 }||||  }||7 }td|||d  d }|d7 }||||||| fS )Nr   r   r   r   r	   )r+   r   r   r   rl   rk   r   )r   r   addrr   labelr)   r   r   r   final_output_countr   r   r   r   r   r   r     s@   zMatcher.next_arcN)r   r
   )T)r@   rA   rB   r   r   r   r   r   r   r   r   r   r   G  s    


8r   __main__aprutf830aug31decfeb2829janjuljunr   )-rF   structr   r   collectionsr   loggingr   	functoolsr   	getLoggerr@   r   r   WARNStreamHandlerr   	Formatter	formattersetFormatter
addHandlerr   r   FLAG_TARGET_NEXTFLAG_STOP_NODEr   r   r   ri   r   objectr   rJ   rK   r~   r   r   encodeinputs1ru   r   r   mrZ   r   r   r   r   r   <module>   sV   



A

#i@s

