o
    kxi6                  
   @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZ dai ae add Zd	d
 ZdefddZdefddZd2dedefddZejejejedZe jedZej dZ!e!rzej"e!dZ#e$d W n% e%y Z& ze$de&  e' Z#W Y dZ&[&ndZ&[&ww e$d e' Z#ej(de#da)ej(de#da*defd d!Z+d"efd#d$Z,defd%d&Z-d3dedefd(d)Z.defd*d+Z/d,efd-d.Z0d4dedefd0d1Z1dS )5    N)embedding_functions)Session)DictionaryItemBibleQA)	BM25Okapi)	Tokenizerc                 C   s<   g }t | D ]}|jdd }|dv r||j q|S )zExtract nouns for BM25.,r   u   名詞)	tokenizertokenizepart_of_speechsplitappendsurface)texttokenstokenpos r   //home/air/biblejyuku/back/dictionary_service.pytokenize_japanese   s   r   c                 C   st   g }g }t | D ],}|jd}|d }t|dkr|d nd}|dv r5||j |dkr5||j q	||fS )uO   
    Extract tokens and specifically identify proper nouns (固有名詞).
    r   r       r	   u   固有名詞)r
   r   r   r   lenr   r   )r   r   proper_nounsr   partsr   sub_posr   r   r   extract_query_details   s   r   dbc           
      C   s   |  t }ztd W n   Y tjdtdai ag }g }g }g }|rut	|D ]4\}}|j
}|| ||jt|jd |t|j t|}	||	 |j|j
|jdt|< q*|ritj|||d |rqt|adS dadS dS )zE
    Wipe and rebuild BibleQA ChromaDB and in-memory BM25 index.
    bible_qanameembedding_functionanswerid)r%   questionr$   	documents	metadatasidsN)queryr   allchroma_clientdelete_collectionget_or_create_collectionembedding_fnqa_collectionbm25_doc_map	enumerater&   r   r$   strr%   r   addr   
bm25_index)
r   itemstokenized_corpusdocsmetasr*   iitemdoc_textr   r   r   r   rebuild_qa_index-   s4   

r>   c                 C   s   i a g }| t }|rFtdt| d t|D ]\}}t|j}|	| |j
|j|j|dt |< q|rBt|atd dS dadS td dS )zc
    Initialize BM25 index from DB without rebuilding Chroma.
    Call this on server startup.
    zInitializing BM25 for z	 items...)r%   r&   r$   r   zBM25 Index built successfully.NzNo items for BM25.)r2   r+   r   r,   printr   r3   r   r&   r   r%   r$   r   r6   )r   r8   r7   r;   r<   r   r   r   r   	init_bm25[   s   

r@      r+   kc              	      s  d}t j| g|d}i }|d r%t|d d D ]\}|d |t< qi }g }g }trit| \}}t| ttt	  fdddd	d
| }	t|	D ]\}}
 |
 dkrht
|
}|rh|d ||d < qOi t| t| B }trdd t
 D ni }|D ]k|v rdd|   nd}|v rdd|   nd}d}|rt|}t|g }||@ }t	|dkrt	|t	| }|dk r|d9 }|d9 }|rt|g }t||s|d9 }|d9 }|| < qt fdddd	d
| }g }|D ]Xd}d}tfddt
 D d
}|r&|d }|d }d}|rM|v rMt|t| @ }t	t|dkrMt	|t	t| }|t |||dd q|S )z
    Hybrid Search: Vector (Chroma) + Keyword (BM25) with RRF Fusion.
    Includes Keyword Coverage Check (Majority Rule).
    
   query_texts	n_resultsr*   r   r   c                        |  S Nr   )r;   )
doc_scoresr   r   <lambda>       z(search_bible_qa_hybrid.<locals>.<lambda>T)keyreverseNr%   c                 S   s"   i | ]\}}|d  | dg qS )r%   r   )get.0rB   vr   r   r   
<dictcomp>   s   " z*search_bible_qa_hybrid.<locals>.<dictcomp><   g        g      ?g{Gz?gMbP?c                    rG   rH   r   )x)fused_scoresr   r   rJ      rK   r   c                 3   s$    | ]\}}|d   kr|V  qdS )r%   Nr   rO   item_idr   r   	<genexpr>   s   " z)search_bible_qa_hybrid.<locals>.<genexpr>r&   r$   )r&   r$   )r%   scorecoveragemetadata)r1   r+   r3   intr6   r   
get_scoressortedranger   r2   rN   setkeysr7   issubsetnextr   r4   )r+   rB   k_retrievalvector_resultsvector_ranksrank
bm25_ranksquery_tokensr   top_n_indicesidx	item_dataall_idsid_to_tokens_mapv_scoreb_scorerZ   start_tokens
doc_tokenscommon
sorted_idsfinal_resultsr&   r$   final_coverager   )rI   rU   rW   r   search_bible_qa_hybridv   s|   
&
"rw   	chroma_db)pathGEMINI_API_KEY)api_keyzUsing Gemini Embeddingsz"Failed to init Gemini Embeddings: z3GEMINI_API_KEY not found, using default embeddings.bible_dictionaryr    r   r<   c                 C   sF   | j  d| j d| j d}tj|g| j | jdgt| j gd dS )z+
    Upsert a single item to ChromaDB.
    :  ()termversesr'   N)r   
definitionr   
collectionupsertr4   r<   r=   r   r   r   	sync_item
  s   

r   r   c                 C   s"   z
t j| gd W dS    Y dS )z'
    Delete an item from ChromaDB.
    r*   N)r   delete)r   r   r   r   delete_item  s   r   c                 C   s   |  t }ztd W n   Y tjdda|sdS g }g }g }|D ])}|j d|j d|j	 d}|
| |
|j|j	pBdd |
t|j q%|r[tj|||d	 dS dS )
za
    Wipe and rebuild ChromaDB from MySQL.
    Useful for 'Reset' or 'Bulk Import' features.
    r|   )r!   Nr}   r~   r   r   r   r'   )r+   r   r,   r-   r.   r/   r   r   r   r   r   r4   r5   )r   r7   r9   r:   r*   r<   r=   r   r   r   rebuild_index   s&   
r      c                 C   s(   t j| g|d}|d sg S |d d S )z+
    Search relevant dictionary items.
    rD   r(   r   )r   r+   )r+   rB   resultsr   r   r   search_dictionaryE  s   r   c                 C   s4   | j }tj|g| jt| jdgt| jgd dS )z3
    Upsert a single BibleQA item to ChromaDB.
    r#   r'   N)r&   r1   r   r$   r4   r%   r   r   r   r   sync_qa_itemW  s   

r   rW   c                 C   s&   zt jt| gd W d S    Y d S )Nr   )r1   r   r4   rV   r   r   r   delete_qa_iteme  s   r   r   c                 C   st   t j| g|d}|d sg S g }tt|d d D ]}||d d | |d d | |d d | d q|S )zo
    Search relevant BibleQA items.
    Returns list of matching BibleQA items (dict or object structure).
    rD   r*   r   	distancesr)   )r%   distancer[   )r1   r+   r_   r   r   )r+   rB   r   matchesr;   r   r   r   search_bible_qam  s   	
r   )rA   )r   )r   )2chromadbchromadb.utilsr   ossqlalchemy.ormr   modelsr   r   	rank_bm25r   janome.tokenizerr   r6   r2   r
   r   r   r>   r@   r4   r\   rw   ry   joindirnameabspath__file__CHROMA_DB_PATHPersistentClientr-   environrN   rz   #GoogleGenerativeAiEmbeddingFunctionr0   r?   	ExceptioneDefaultEmbeddingFunctionr/   r   r1   r   r   r   r   r   r   r   r   r   r   r   <module>   sN   	. 	%