o
     Xxi"C                     @   s  d dl mZmZ d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ ddlmZmZmZ eeZeej e Zeej edZe e e!e dZ"d	Z#d
Z$dZ%dZ&dZ'dZ(dZ)dZ*dAddZ+dd Z,dd Z-dd Z.dd Z/dBddZ0dBddZ1dBd d!Z2d"d# Z3d$d% Z4d&d' Z5dCd)d*Z6d+d, Z7d-d. Z8d/d0 Z9d1d2 Z:G d3d4 d4eZ;G d5d6 d6e;Z<G d7d8 d8e;Z=G d9d: d:e>Z?G d;d< d<e<Z@G d=d> d>e<ZAG d?d@ d@eBZCdS )D    )ABCabstractmethodN)packunpack)	lru_cache   )Matchercreate_minimum_transducer
compileFSTz0%(asctime)s	%(name)s - %(levelname)s	%(message)szfst_data%d.pyzentries_extra%d.pyzentries_compact%d.pyzentries_buckets.pyzconnections%d.pyz
chardef.pyzunknowns.pyzuser_fst.datazuser_entries.datac                 C   s    t tj|t| | dd d S )NT)binary)_save_as_moduleospathjoinMODULE_FST_DATA)datadirpart r   I/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/janome/dic.pysave_fstdata3   s    r   c                 C   s4   t tj| t| | t tj| t| | d S N)_start_entries_as_moduler   r   r   MODULE_ENTRIES_COMPACTMODULE_ENTRIES_EXTRA)r   
bucket_idxmorph_offsetr   r   r   start_save_entries7   s   r   c                 C   s0   t tj| t|  t tj| t|  d S r   )_end_entries_as_moduler   r   r   r   r   )r   r   r   r   r   end_save_entries<   s   r   c                 C   s8   t tj| t| || ttj| t| || d S r   )_save_entry_as_module_compactr   r   r   r   _save_entry_as_module_extrar   )r   r   morph_identryr   r   r   
save_entryA   s   r$   c                 C   s   t tj| t| d S r   )r   r   r   r   MODULE_ENTRIES_BUCKETS)r   bucketsr   r   r   save_entry_bucketsF      r'   .c                 C   sT   t | d d }d}tddD ]}ttj|t| | |||   ||7 }qd S )N   r   r      )lenranger   r   r   r   MODULE_CONNECTIONS)connectionsr   bucket_sizeoffsetir   r   r   save_connectionsJ   s   
r3   c                 C      t tj|t|  d S r   )r   r   r   r   MODULE_CHARDEFS)chardefsr   r   r   r   save_chardefsU   r(   r7   c                 C   r4   r   )r   r   r   r   MODULE_UNKNOWNS)unknownsr   r   r   r   save_unknownsY   r(   r:   c                 C   sN   |sd S t | d|}|| |  W d    d S 1 s w   Y  d S )Nwb)gzipopenwriteflush)filer   compresslevelfr   r   r   _save]   s   

"rC   c                 C   sL   t j| sd S t| d}| }|W  d    S 1 sw   Y  d S )Nrb)r   r   existsr<   r=   read)r@   rB   r   r   r   r   _loade   s   $rG   c                 C   s8   zt | |}W n
 ty   Y d S w t|tjdB S )N   )pkgutilget_dataIOErrorzlib
decompress	MAX_WBITS)packageresourcerawdatar   r   r   _load_package_datam   s   rR   Fc                 C   s   |sd S t | d5}|d |r'|d |t|d |d n|t|dd |  W d    d S 1 sAw   Y  d S )NwzDATA="asciiz\\\)r=   r>   base64	b64encodedecodestrreplacer?   )r@   r   r   rB   r   r   r   r   u   s   


"r   c              	   C   s   t dd| }t| d9}t|d}|d |d |d| d W d    n1 s0w   Y  W d    d S W d    d S 1 sHw   Y  d S )N\.py$_idx.pyrS   zDATA={z
"offset": z, "positions": [)resubr=   r>   )r@   morph_id_offsetidx_filerB   f_idxr   r   r   r      s   

"r   c              	   C   s   t dd| }t| d8}t|d}|d |d |  |  W d    n1 s/w   Y  W d    d S W d    d S 1 sGw   Y  d S )Nr\   r]   az}
z]}
)r^   r_   r=   r>   r?   )r@   ra   rB   rb   r   r   r   r      s   


"r   c              	   C   s   t dd| }t| dY}t|d;}|d|  | }|| d d|d dd	|d
 |d |d f }|| |d W d    n1 sPw   Y  W d    d S W d    d S 1 shw   Y  d S )Nr\   r]   rc   %d:(,zu'%s',%4d,%4d,%5dr   unicode_escaperU   r   r*   r+   ),r^   r_   r=   r>   tellencoderY   r@   r"   r#   ra   rB   rb   possr   r   r   r       s$   
"r    c                 C   s&  t dd| }t| d}}t|d_}|d|  | }|| d d|d dd	|d
 dd	|d dd	|d dd	|d dd	|d dd	f }|| |d W d    n1 stw   Y  W d    d S W d    d S 1 sw   Y  d S )Nr\   r]   rc   rd   re   z#u'%s',u'%s',u'%s',u'%s',u'%s',u'%s'   rf   rU               	   rg   rh   rk   r   r   r   r!      s(   
"r!   c                   @   s4   e Zd ZdZedd Zedd Zedd ZdS )	
Dictionaryz
    Base dictionary class
    c                 C      d S r   r   )selfrm   matcherr   r   r   lookup      zDictionary.lookupc                 C   ru   r   r   rv   numr   r   r   lookup_extra   ry   zDictionary.lookup_extrac                 C   ru   r   r   rv   id1id2r   r   r   get_trans_cost   ry   zDictionary.get_trans_costN)__name__
__module____qualname____doc__r   rx   r|   r   r   r   r   r   rt      s    

rt   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )RAMDictionaryz
    RAM dictionary class
    c                 C   s   || _ || _d S r   )entriesr/   )rv   r   r/   r   r   r   __init__   s   
zRAMDictionary.__init__c                 C   s   | |\}}|sg S z g }|D ]}td|d }||f| j| d d   q|W S  tyU   td td|  tdt|  t	  t
d Y d S w )NIr   rn   =Cannot load dictionary data. The dictionary may be corrupted?input=outputs=r   )runr   appendr   	ExceptionloggererrorrZ   	traceback
format_excsysexit)rv   rm   rw   matchedoutputsreser{   r   r   r   rx      s     
zRAMDictionary.lookupc                 C   sF   z
| j | dd  W S  ty"   td t  td Y d S w )Nrn   r   r   )r   r   r   r   r   r   r   r   rz   r   r   r   r|      s   
zRAMDictionary.lookup_extrac                 C      | j | | S r   r/   r}   r   r   r   r         zRAMDictionary.get_trans_costN)r   r   r   r   r   rx   r|   r   r   r   r   r   r      s    r   c                   @   sT   e Zd ZdZdd Zdd Zedddd	 Zed
ddd Zdd Z	dd Z
dS )MMapDictionaryz
    MMap dictionary class
    c                 C   s&   || _ | | _|| _|| _|| _d S r   )entries_compactkeysbucket_rangesentries_extra
open_filesr/   )rv   r   r   r   r/   r   r   r   r      s
   

zMMapDictionary.__init__c                 C   s   | |\}}|sg S zg }|D ]}td|d }||f| |  q|W S  tyQ   td td|  tdt|  t	  t
d Y d S w )Nr   r   r   r   r   r   )r   r   r   _find_entryr   r   r   rZ   r   r   r   r   )rv   rm   rw   r   r   matched_entriesr   idxr   r   r   rx      s    
zMMapDictionary.lookupi    maxsizec                    s   t t fdd| j}| j| \}} |d  }|d | d }|d|}|d }|d }	|	d }
|
d }|d }|d	 }||| d
t|||	 t||
| t||| f}|S )Nc                        | d ko | d k S Nr   r   r   br   r   r   <lambda>      z,MMapDictionary._find_entry.<locals>.<lambda>r1   	positionsr*   s   ',rn   r   ro   rf   )nextfilterr   r   findrY   int)rv   r   bucketmmmm_idxrel_idx_pos1s_pos1e_pos2s_pos2e_pos3s_pos3e_pos4s_pos4e_entryr   r   r   r     s"   zMMapDictionary._find_entry   c                    sZ  zt t fdd| j}| j| \}} |d  }|d | d }|d|}|d }|d|}	|	d }
|d|
}|d }|d|}|d }|d|}|d }|d|}||| d	|||	 d	||
| d	||| d	||| d	||| d	fW S  ty   td
 td   t	
  td Y d S w )Nc                    r   r   r   r   r   r   r   r   $  r   z-MMapDictionary.lookup_extra.<locals>.<lambda>r1   r   r*   s   ',u'rn   s   ')rf   z8Cannot load extra info. The dictionary may be corrupted?zidx=r   )r   r   r   r   r   rY   r   r   r   r   r   r   r   )rv   r   r   r   r   r   r   r   r   r   r   r   r   r   _pos5s_pos5e_pos6s_pos6er   r   r   r|   !  s>   
zMMapDictionary.lookup_extrac                 C   r   r   r   r}   r   r   r   r   ?  r   zMMapDictionary.get_trans_costc                 C   sV   | j  D ]\}}|  q| jr| j D ]\}}|  q| jD ]}|  q"d S r   )r   valuescloser   r   )rv   r   r   fpr   r   r   __del__B  s   



zMMapDictionary.__del__N)r   r   r   r   r   rx   r   r   r|   r   r   r   r   r   r   r      s    

r   c                   @   sB   e Zd ZdZdd Zedddd Zdd	 Zd
d Zdd Z	dS )UnknownsDictionaryz5
    Dictionary class for handling unknown words
    c                 C   s   |d | _ |d | _|| _d S r   )char_categorieschar_rangesr9   )rv   r6   r9   r   r   r   r   Q  s   


zUnknownsDictionary.__init__r   r   c                 C   sd   i }| j D ]$}|d |  kr|d kr)n q|d }d|v r#|d ng }|||< q|s0dg i}|S )Nfromtocatecompat_catesDEFAULT)r   )rv   cr   	chr_ranger   compate_catesr   r   r   get_char_categoriesV  s   
 z&UnknownsDictionary.get_char_categoriesc                 C      || j v r| j | d S dS )NINVOKEFr   rv   r   r   r   r   unknown_invoked_alwaysb     
z)UnknownsDictionary.unknown_invoked_alwaysc                 C   r   )NGROUPFr   r   r   r   r   unknown_groupingg  r   z#UnknownsDictionary.unknown_groupingc                 C   r   )NLENGTHr   r   r   r   r   unknown_lengthl  r   z!UnknownsDictionary.unknown_lengthN)
r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   L  s    
r   c                       s\   e Zd ZdZd fdd	Z fddZedd Zed	d
 Zedd Z	dddZ
  ZS )UserDictionaryz,
    User dictionary class (on-the-fly)
    Nc                    s:   t ||||\}}t || |g| _t|g| _dS )a  
        Initialize user defined dictionary object.

        :param user_dict: user dictionary file (CSV format)
        :param enc: character encoding
        :param type: user dictionary type. supported types are 'ipadic' and 'simpledic'
        :param connections: connection cost matrix. expected value is SYS_DIC.connections
        :param progress_handler: handler mainly to indicate progress, implementation of ProgressHandler

        .. seealso:: http://mocobeta.github.io/janome/en/#use-with-user-defined-dictionary
        N)r   	build_dicsuperr   compiledFSTr   rw   )rv   	user_dictenctyper/   progress_handlerfst_datar   	__class__r   r   r   w  s   zUserDictionary.__init__c                       t  || jS r   r   rx   rw   rv   rm   r   r   r   rx        zUserDictionary.lookupc                 C   sZ   | d\}}}}}}}}	}
}}}}d||||	g}|t|t|t|||
||||f
S )z;Convert IPADIC formatted string to an user dictionary entryre   )splitr   r   )clslinesurfaceleft_idright_idcost	pos_major
pos_minor1
pos_minor2
pos_minor3	infl_type	infl_form	base_formreadingphoneticpart_of_speechr   r   r   line_to_entry_ipadic  s   

z#UserDictionary.line_to_entry_ipadicc              
   C   s:   | d\}}}d|dddg}|ddd|dd|||f
S )z?Convert simpledict formatted string to an user dictionary entryre   *r   i`y)r   r   )r   r   r   r   r   r   r   r   r   line_to_entry_simpledic  s   z&UserDictionary.line_to_entry_simpledicc                 C   sX  g }i }t | d| }|r1t||d}|jtdd |D dd W d    n1 s,w   Y  tj||d2}|D ]'}	|	 }	||	}
t|}||
d dt	d	|f |
||< |rb|
  q;W d    n1 smw   Y  |rx|  t|}t|t|ksJ |r|jt|d
d t||r|j
nd d\}}|r|  t|}||fS )Nline_to_entry_)encodingc                 s   s    | ]}d V  qdS )r   Nr   ).0r   r   r   r   	<genexpr>  s    z+UserDictionary.build_dic.<locals>.<genexpr>z Reading user dictionary from CSV)totaldescr   utf8r   z!Running create_minimum_transducer)on_progress)getattrr=   on_startsumiorstripr,   r   rj   r   r
  on_completesortedr	   r
   )r   r   r   	dict_typer   surfacesr   line_to_entryrB   r   r#   r"   inputs	processedfstr   r   r   r   r     sN   
zUserDictionary.build_dicrs   c                 C   s   t j|rt j|std| t j|s#t j|tddd tt j|t	| j
d | tt j|tt| j| dS )z
        Save compressed compiled dictionary data.

        :param to_dir: directory to save dictionary data
        :compressionlevel: (Optional) gzip compression level. default is 9
        zNot a directory : 0755rr   )moder   N)r   r   rE   isdirr   makedirsr   rC   r   FILE_USER_FST_DATAr   FILE_USER_ENTRIES_DATApickledumpsr   )rv   to_dircompressionlevelr   r   r   save  s   "zUserDictionary.saver   )rs   )r   r   r   r   r   rx   classmethodr   r  r   r"  __classcell__r   r   r   r   r   r  s    
	

2r   c                       s8   e Zd ZdZ fddZ fddZedd Z  ZS )CompiledUserDictionaryz*
    User dictionary class (compiled)
    c                    s,   t |\}}t || t|g| _d S r   )r%  	load_dictr   r   r   rw   )rv   dic_dirr/   r   r   r   r   r   r     s   zCompiledUserDictionary.__init__c                    r   r   r   r   r   r   r   rx     r   zCompiledUserDictionary.lookupc                 C   sX   t j|rt j|std| tt j|t}t	tt j|t
}||fS )NzNo such directory : )r   r   rE   r  r   rG   r   r  r  loadsr  )r   r'  r   r   r   r   r   r&    s
   z CompiledUserDictionary.load_dict)	r   r   r   r   r   rx   r#  r&  r$  r   r   r   r   r%    s    r%  c                   @   s   e Zd Zdd ZdS )LoadingDictionaryErrorc                 C   s
   d| _ d S )NzECannot load dictionary data. Try mmap mode for very large dictionary.)message)rv   r   r   r   r     s   
zLoadingDictionaryError.__init__N)r   r   r   r   r   r   r   r   r)    s    r)  )r   )r)   )F)Dabcr   r   r   r  r  r<   structr   r   r   loggingr   r^   rI   rL   rW   	functoolsr   r  r   r	   r
   	getLoggerr   r   setLevelWARNStreamHandlerhandler	Formatter	formattersetFormatter
addHandlerr   r   r   r%   r.   r5   r8   r  r  r   r   r   r$   r'   r3   r7   r:   rC   rG   rR   r   r   r   r    r!   rt   r   r   objectr   r   r%  r   r)  r   r   r   r   <module>   sl   








	
&\&l