o
    3IhHY                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(Z)d dl*mZ+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9 e"dZ:de e:e!e: f de!e: fddZ;dedefddZ<dee-j= dee-j= ddfdd Z>d!e0deddfd"d#Z?d!e0d$ed%e e'd& e'd' e'd( f ddfd)d*Z@d!e0deddfd+d,ZAd!e0deddfd-d.ZBd!e0deddfd/d0ZCd!e0deddfd1d2ZDd!e0ddfd3d4ZEe9jFfd5e-jGd6e-jGd7ee+jHe+jHgeIf dee!e!eJ  e!e!eI  f fd8d9ZKd:eJddfd;d<ZLd!e0fd=d>ZM	?	@			d[d!e0dedAeJdBeIdCee-jN dDee!eJ  dEee-jG ddfdFdGZOdHe-jPdAeJddfdIdJZQdKedeJfdLdMZRdNedOe!e0 dPeSddfdQdRZTdNedOe!e0 de%eeJf fdSdTZUdNedOe!e0 dPeSddfdUdVZVedWe.dXeWfdYdZZXdS )\    N)UUID)contextmanager)
SegmentAPI)SysDB)create_topic_name)System)get_sql)SqliteDB)sleep)SegmentType)NormalizedRecordSet	RecordSet)	CallableOptionalTupleUnionListTypeVarcastAnyDict)Literal)types	ClientAPI)
Collection)note)InvalidArgument)Table	functions)distance_functionsTvaluereturnc                 C   s$   | du rt dt| tr| S | gS )z*Wrap a value in a list if it is not a listNzvalue cannot be None)r   
isinstancer   )r!    r$   a/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/chromadb/test/property/invariants.pywrap"   s
   
r&   
record_setc                 C   s(  | d du r	d}net | d trj| d dusJ t| d dkratdd | d D r4ttj| d }n:tdd | d D rHttj| d }n&tdd | d D r]ttj| d g}ntdttj| d }ntd	t| d
 | d durt| d nd| d durt| d |dS d|dS )z9Ensure that an embedding set has lists for all its values
embeddingsNr   c                 s   s    | ]}t |tV  qd S N)r#   list.0	embeddingr$   r$   r%   	<genexpr>5   s    

zwrap_all.<locals>.<genexpr>c                 s   s    | ]	}t |tjV  qd S r)   )r#   npndarrayr+   r$   r$   r%   r.   9   s
    

c                 s   s&    | ]}t |tttjtjfV  qd S r)   )r#   intfloatr/   integerfloatingr,   er$   r$   r%   r.   ?   s
    
z-an embedding must be a list of floats or intszVembeddings must be a list of lists, a list of numpy arrays, a list of numbers, or Noneids	documents	metadatas)r7   r8   r9   r(   )	r#   r*   lenallr   r   
Embeddingsr   r&   )r'   embedding_listr$   r$   r%   wrap_all,   sF   

r>   expectedgotc                 C   s   | d u r|d u s| d ur|d usJ | d ur`|d urbt | t |ks$J |  D ];\}}||v s2J t| | trUt|| trUttt| | tt||  dk sTJ q(| | || ks_J q(d S d S d S )Nư>)r:   itemsr#   r2   absr   )r?   r@   keyvalr$   r$   r%   check_metadata[   s   *rF   
collectionc                 C   sN   |   }t|}|t|d krtd|dt|d  |t|d ks%J dS )z?The given collection count is equal to the number of embeddingsr7   zcount mismatch:z=!N)countr>   r:   print)rG   r'   rH   normalized_record_setr$   r$   r%   rH   k   s
   rH   rJ   
field_namer8   r9   r(   c                    sT  | j |d |gddd t|d D  | }t|d dkr<|dkr4ttjt |jdks2J dS |g ks:J dS |dusBJ tt| fdd	d
}dd |D }|| }|du rgdgt|d  }|dkr{t	
t	|t	|syJ dS t|t|ksJ t||D ]\}}t|trtttj|ttj| q||ksJ qdS )zq
    The actual embedding field is equal to the expected field
    field_name: one of [documents, metadatas]
    r7   r7   includec                 S      i | ]\}}||qS r$   r$   r,   iidr$   r$   r%   
<dictcomp>       z"_field_matches.<locals>.<dictcomp>r   r(   Nc                    s    d | d   S )Nr7   r   r$   )index_and_field_valueembedding_id_to_indexresultr$   r%   <lambda>   s    z _field_matches.<locals>.<lambda>rD   c                 S   s   g | ]\}}|qS r$   r$   )r,   _field_valuer$   r$   r%   
<listcomp>       z"_field_matches.<locals>.<listcomp>)get	enumerater:   r   nptNDArrayr   sizesortedr/   allclosearrayzipr#   dictrF   r   Metadata)rG   rJ   rK   actual_fieldsorted_fieldfield_valuesexpected_fieldr[   r$   rU   r%   _field_matchest   s:    


rm   c                    s\   t |}| j|d g dd }dd t|d D  t| fddd}||d ks,J dS )	z5The actual embedding ids is equal to the expected idsr7   rL   c                 S   rN   r$   r$   rO   r$   r$   r%   rR      rS   zids_match.<locals>.<dictcomp>c                    s    |  S r)   r$   rQ   rV   r$   r%   rX      s    zids_match.<locals>.<lambda>rY   N)r>   r^   r_   rc   )rG   r'   rJ   
actual_idsr$   ro   r%   	ids_match   s
   rq   c                 C      t |}t| |d dS )z?The actual embedding metadata is equal to the expected metadatar9   Nr>   rm   rG   r'   rJ   r$   r$   r%   metadatas_match      ru   c                 C   rr   )AThe actual embedding documents is equal to the expected documentsr8   Nrs   rt   r$   r$   r%   documents_match   rv   rx   c                 C   rr   )rw   r(   Nrs   rt   r$   r$   r%   embeddings_match   rv   ry   c                 C   s(   |   d }t|tt|ksJ d S )Nr7   )r^   r:   set)rG   r7   r$   r$   r%   no_duplicates   s   r{   querytargetsdistance_fnc                    sN   t j| t jd}t j|t jdt  fddd|}t | | fS )zGReturn the ordered indices and distances from each query to each target)dtypec                    s   t  d| S )N   )r/   apply_along_axis)r|   r~   
np_targetsr$   r%   rX      s    z"_exact_distances.<locals>.<lambda>r   )r/   re   float32r   argsorttolist)r|   r}   r~   np_query	distancesr$   r   r%   _exact_distances   s   r   threadpool_sizec                 C   s   t  }| }d}d}tdd |D d | kr>||k r>t  | }|d7 }td tdd |D d | kr>||k stdd |D d | ksMJ dS )zs
    Checks that the open file descriptors are not exceeding the threadpool size
    works only for SegmentAPI
       r   c                 S      g | ]
}d |j v r|j qS sqlite3pathr,   pr$   r$   r%   r\          z4fd_not_exceeding_threadpool_size.<locals>.<listcomp>r   c                 S   r   r   r   r   r$   r$   r%   r\      r   N)psutilProcess
open_filesr:   gccollectr
   )r   current_processr   max_retriesretry_countr$   r$   r%    fd_not_exceeding_threadpool_size   s   "r   c                 C   s   d }d| j v r| j d }| jjd u r|S d| jjv r8| jjdd ur8d| jjdv r8| jjdd}|S d| jjv r^| jjdd ur^d| jjdv r^|d u r^| jjdd}|S )Nz
hnsw:spacespannspacehnsw)metadata_modelconfiguration_jsonr^   )rG   r   r$   r$   r%   	get_space   s    

r   r   Gz?	n_results
min_recallembedding_functionquery_indicesquery_embeddingsc              	      s  t |}t|d dkrdS |d   duot dk}|s;|dus$J |d dus,J t|d ts5J ||d  t| }	|	du rGtj}
n|	dkrOtj}
n|	dkrWtj}
n|	dkr^tj}
d	}| j	dusgJ  dusmJ t
d
d  D sxJ t d }|tdtt| }|du r|du r n fdd|D }|d |durdurfdd|D t| |
d\}}| j|r|nd|sǈnd|g dd}t|| |d dusJ |d dusJ |d dusJ |d dusJ |d dusJ dd t|d D }d}tt||D ]\}\}}t|d |d|  }|tt|t|d |  7 }t|d | D ]n\}}||v}|| }tj|| |d | | |d}|rd|rb|d8 }nq;|siJ t | |d | | szJ |d dur|d | |d | | ksJ |d durt|d | |d | |  q;qt|d }|| | }ztd| d| d| d|  W n
 ty   Y nw ||ksJ |d D ]}tt||sJ qdS )zBValidate that the API performs nearest_neighbor searches correctlyr7   r   Nr(   r8   cosineipl2rA   c                 s   s     | ]}t |ttjfV  qd S r)   )r#   r*   r/   r0   r5   r$   r$   r%   r.   ;  s    zann_accuracy.<locals>.<genexpr>
   c                       g | ]} | qS r$   r$   r,   rP   )r(   r$   r%   r\   D  r]   z ann_accuracy.<locals>.<listcomp>c                    r   r$   r$   r   )query_documentsr$   r%   r\   H  r]   )r~   )r(   r8   r9   r   )r   query_textsr   rM   r   r9   c                 S   rN   r$   r$   rO   r$   r$   r%   rR   _  rS   z ann_accuracy.<locals>.<dictcomp>)atolr   z
# recall: z
, missing z out of z, accuracy threshold )r>   r:   r#   r*   r   r   r   r   r   r   r;   mathpowr1   log10r   r|    _query_results_are_correct_shaper_   rf   r/   re   rz   rd   rF   r   r   sort)rG   r'   r   r   r   r   r   rJ   have_embeddingsr   distance_functionaccuracy_thresholddimindicesr   query_resultsid_to_indexmissingrP   	indices_idistances_iexpected_idsjrQ   unexpected_idindexcorrect_distancerb   recalldistance_resultr$   )r(   r   r%   ann_accuracy  s   




 

"


 r   r   c                    s<   dD ]}| | d usJ t  fdd| | D sJ qd S )N)r   r(   r8   r9   c                 3   s    | ]	}t | kV  qd S r)   )r:   )r,   rW   r   r$   r%   r.     s    
z3_query_results_are_correct_shape.<locals>.<genexpr>)r;   )r   r   result_typer$   r   r%   r     s   r   sqlitec                 C   s   t d}|  |}|  '}t|t|j| 	 \}}|
||}tt| d W  d    S 1 s9w   Y  d S )Nembeddings_queuer   )r   querybuilderfrom_txr   selectr   Countseq_idparameter_formatexecuter   r1   fetchone)r   tqcursqlparamsrW   r$   r$   r%   _total_embedding_queue_log_size  s   
$r   systemcollectionshas_collection_mutatedc                 C   s   |  t}| jjs| jjdkrd S |rEt|dksJ tdd |D }tdd |D }| jjdkr5|n|| }t|d |ksCJ d S t|dksMJ d S )N!chromadb.api.rust.RustBindingsAPIr   c                 s   ,    | ]}|j d ur|j ddndV  qd S )Nhnsw:sync_threshold  r   r^   r,   rG   r$   r$   r%   r.         

z%log_size_below_max.<locals>.<genexpr>c                 s   r   Nzhnsw:batch_sized   r   r   r$   r$   r%   r.     r   r   )instancer	   settingsis_persistentchroma_api_implr   sum)r   r   r   r   sync_threshold_sumbatch_size_sumlimitr$   r$   r%   log_size_below_max  s&   
	r   c                    s   |  t}td}| |}| jd | jd fdd|D }| 9}t|	|j
t|jd| \}}|||}	i }
|	 D ]}|d |
||d  j< qL|
W  d    S 1 sfw   Y  d S )	Nr   	tenant_idtopic_namespacec                    s   i | ]
}t  |j|qS r$   )r   rQ   r   _tenant_topic_namespacer$   r%   rR     s    zB_total_embedding_queue_log_size_per_collection.<locals>.<dictcomp>topicr   r   )r   r	   r   r   r   r   requirer   r   r   r   r   r   r   groupbyr   r   fetchallrQ   )r   r   r   r   r   topic_mappingsr   r   r   rW   outresr$   r   r%   ._total_embedding_queue_log_size_per_collection  s&   

$r   c                    s   | j jdkrd S | t}|rJt|dksJ dd |D fdd|D t| | t  t ks:J t fdd|D sHJ d S t|dksRJ d S )	Nr   r   c                 S   s,   i | ]}|j |jd ur|jddndqS r   )rQ   r   r^   r   r$   r$   r%   rR     s    
z;log_size_for_collections_match_expected.<locals>.<dictcomp>c                    s&   i | ]}|j |  |j   d  qS )r   )rQ   rH   r   )r   r$   r%   rR     s    c                 3   s$    | ]} |j  |j  kV  qd S r)   rn   r   )actual_sizesexpected_sizesr$   r%   r.     s
    
z:log_size_for_collections_match_expected.<locals>.<genexpr>r   )	r   r   r   r	   r   r   rz   keysr;   )r   r   r   r   r$   )r   r   r   r%   'log_size_for_collections_match_expected  s&   

r  clientcollection_namec              	   c   s   dd |   D }||v sJ | |}g }t| jtr~| jj}|j|jd}i }d}|D ]=}d||d < |d tj	j
krk|jd urMd|jv rM|jd nd}	| |	krkd}tjtj|  jt|d	 skJ q.|rv|tj	j
 svJ |tjj
 s~J d V  d
d |   D }||vsJ t|dkr| jj}|j|jd}
t|
dksJ |D ] }|d tj	j
krtjtj|  jt|d	 rJ qd S d S )Nc                 S      g | ]}|j qS r$   namer,   cr$   r$   r%   r\         z&collection_deleted.<locals>.<listcomp>)rG   FTtyper   r   rQ   c                 S   r  r$   r  r  r$   r$   r%   r\   <  r
  r   )list_collectionsget_collectionr#   _serverr   _sysdbget_segmentsrQ   r   HNSW_LOCAL_PERSISTEDr!   r   rH   osr   existsjoinget_settingspersist_directorystrSQLITEr:   )r  r  collection_namesrG   segmentssysdbsegment_typesshould_have_hnswsegmentsync_thresholdsegments_afterr$   r$   r%   collection_deleted  s\   



r!  )r   r   NNN)Yr   r   os.pathr  uuidr   
contextlibr   chromadb.api.segmentr   chromadb.db.systemr   chromadb.ingest.impl.utilsr   chromadb.configr   chromadb.db.baser   chromadb.db.impl.sqliter	   timer
   r   chromadb.segmentr   !chromadb.test.property.strategiesr   r   typingr   r   r   r   r   r   r   r   r   typing_extensionsr   numpyr/   numpy.typingr`   chromadb.apir   r   chromadb.api.models.Collectionr   
hypothesisr   hypothesis.errorsr   pypikar   r   chromadb.utilsr   r    r&   r>   rh   rF   rH   rm   rq   ru   rx   ry   r{   r   r<   	ArrayLiker2   r1   r   r   r   EmbeddingFunctionr   QueryResultr   r   boolr   r   r  r  r!  r$   r$   r$   r%   <module>   s    ,"
/
	
7



 


+


%