o
    *Tht                     @   s  d dl mZmZmZmZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z& d dl'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZK d dlLmMZMmNZNmOZOmPZPmQZQmRZR d dl0mSZT d dlUmVZVmWZWmXZXmYZYmZZZm[Z[m\Z\ d dl]m^Z^ d dl_m`Z`maZa d dlbmcZc d dldZdd dleZed dlfZfe\de[deZf dZgeeheiZjdekddfdd Zld!egdegfd"d#ZmG d$d% d%eZn				d.d&eTjod'e;d(eVe< d)eVe= d*eVe> d+eVe? deXeTjpddf fd,d-ZqdS )/    )retrystop_after_attemptretry_if_exception
wait_fixed)	ServerAPI)CreateCollectionConfigurationUpdateCollectionConfiguration'create_collection_configuration_to_json)UserIdentity)DEFAULT_DATABASEDEFAULT_TENANTSettingsSystem)SysDB)QuotaEnforcerAction)RateLimitEnforcer)SegmentManager)Executor)ScanFilterLimitKNN
Projection)	CountPlanGetPlanKNNPlan)add_attributes_to_current_spanOpenTelemetryClientOpenTelemetryGranularitytrace_method)ProductTelemetryClient)Producer)
Collection__version__)InvalidDimensionExceptionNotFoundErrorVersionMismatchError)CollectionMetadataIDs
Embeddings	Metadatas	DocumentsURIsWhereWhereDocumentInclude	GetResultQueryResultvalidate_metadatavalidate_update_metadatavalidate_wherevalidate_where_documentvalidate_batchIncludeMetadataDocuments!IncludeMetadataDocumentsDistances)CollectionAddEventCollectionDeleteEventCollectionGetEventCollectionUpdateEventCollectionQueryEventClientCreateCollectionEventN)OptionalSequence	GeneratorListAnyCallableTypeVar)override)UUIDuuid4)wrapsT.)bound
index_namereturnc                 C   sf   d|  }t | dk st | dkrt|td| st|d| v r't|td| r1t|d S )Na  Expected collection name that (1) contains 3-63 characters, (2) starts and ends with an alphanumeric character, (3) otherwise contains only alphanumeric characters, underscores or hyphens (-), (4) contains no two consecutive periods (..) and (5) is not a valid IPv4 address, got    ?   z'^[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]$z..z0^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$)len
ValueErrorrematch)rN   msg rW   U/home/air/segue/gemini/back/venv/lib/python3.10/site-packages/chromadb/api/segment.pycheck_index_nameT   s   	rY   funcc                    s&   t  dtdtdtf fdd}|S )NargskwargsrO   c                     s   | d }|j  | i |S )Nr   )_rate_limit_enforcer
rate_limit)r[   r\   selfrZ   rW   rX   wrapperi   s   zrate_limit.<locals>.wrapper)rK   rE   )rZ   ra   rW   r`   rX   r^   h   s   r^   c                       st  e Zd ZU dZeed< eed< eed< eed< e	ed< e
ed< eed< eed	< eed
< eed< def fddZedefddZedejeefdededdfddZedejeefdededejfddZedejeefdededdfddZedejeddefdee dee dedeej fddZedejededdfd dZede fd!d"Z!ed#ejededej"fd$d#Z#ed%ejee$ddd&ee%fded'ee& d(ee' d)e(ded*ede)fd+d%Z*ed,ejee$ddee%fded'ee& d(ee' ded*ede)fd-d,Z+ed.ejee$dee%fdee ded*ede)fd/d.Z,ed0ejee$ddee%fdee dee ded*edee) f
d1d2Z-ed3ejee$ee%fded*edefd4d3Z.ed5ejee$dddee%fd6e/d7ee d8ee' d9ee0 ded*eddfd:d5Z1eee%fd;e/d7eded*ede)f
d<d=Z2ed>ejee$ee%fdeded*eddfd?d>Z3ed@ejee$dddee%fdAe4d;e/dBe5dCee6 dDee7 dEee8 ded*ede(fdFd@Z9edGejee$ddddee%fd;e/dAe4dBee5 dCee6 dDee7 dEee8 ded*ede(fdHdGZ:edIejee$dddee%fd;e/dAe4dBe5dCee6 dDee7 dEee8 ded*ede(fdJdIZ;edKeje<e=dLdM e>dNe?dOdPdQee$ddddde@ee%fd;e/dAee4 dReeA dee dee dSeeB dTeCded*edeDfdUdKZEedVejee$dddee%fd;e/dAee4 dReeA dSeeB ded*eddfdWdVZFedXeje<e=dYdM e>dNe?dOdPdQee$ee%fd;e/ded*edefdZdXZGed[eje<e=d\dM e>dNe?dOdPdQee$dd]ddeHee%fd;e/d^e5dAee4 d_edReeA dSeeB dTeCded*edeIfd`d[ZJedaejee$d]ee%fd;e/dbeded*edeDf
dcdaZKedefdddeZLedzdfdgZMede(fdhdiZNedefdjdkZOedefdldmZPednejQdoejRdpeSejT ddfdqdnZUdoejRdredse(ddfdtduZVedvejQd;e/dejRfdwdvZWedxejd;e/deXfdydxZY  ZZS ){
SegmentAPIzHAPI implementation utilizing the new segment-based internal architecture	_settings_sysdb_manager	_executor	_producer_product_telemetry_client_opentelemetry_client
_tenant_id	_topic_nsr]   systemc                    sz   t  | |j| _| t| _| t| _| t	| _
| t| _| t| _| t| _| t| _| jt| _d S N)super__init__settingsrc   requirer   rd   r   re   r   rf   r   _quota_enforcerr!   rh   r   ri   r"   rg   _systemr   r]   )r_   rl   	__class__rW   rX   ro      s   zSegmentAPI.__init__rO   c                 C   s   t t S rm   )inttimetime_nsr_   rW   rW   rX   	heartbeat   s   zSegmentAPI.heartbeatzSegmentAPI.create_databasenametenantNc                 C   s@   t |dk r
td| jjtj||d | jjt ||d d S )NrP   z0Database name must be at least 3 characters long)actionr|   r{   )idr{   r|   )	rR   rS   rr   enforcer   CREATE_DATABASErd   create_databaserJ   r_   r{   r|   rW   rW   rX   r      s   
zSegmentAPI.get_databasec                 C      | j j||dS N)r{   r|   )rd   get_databaser   rW   rW   rX   r      s   zSegmentAPI.delete_databasec                 C   s   | j j||d d S r   )rd   delete_databaser   rW   rW   rX   r      s   zSegmentAPI.list_databaseslimitoffsetc                 C   s   | j j|||dS )N)r   r   r|   )rd   list_databases)r_   r   r   r|   rW   rW   rX   r      s   zSegmentAPI.create_tenantc                 C   s&   t |dk r
td| jj|d d S )NrP   z.Tenant name must be at least 3 characters longr{   )rR   rS   rd   create_tenantr_   r{   rW   rW   rX   r      s
   
c                 C   s   t dttgdS )N )user_idr|   	databases)r
   r   r   ry   rW   rW   rX   get_user_identity   s
   zSegmentAPI.get_user_identityzSegmentAPI.get_tenantc                 C   s   | j j|dS )Nr   )rd   
get_tenantr   rW   rW   rX   r      s   zSegmentAPI.create_collectionFconfigurationmetadataget_or_createdatabasec                 C   s   |d urt | t| | jjtj|||d t }t|||t|p#t	 ||d d}| j
j|j|j|p5t	 g |jd |||d	\}	}
|
rU| j|	}|D ]}| j
| qKn	td| d | jtt|d tdt|i |	S )Nr}   r|   r{   r   )r~   r{   r   configuration_jsonr|   r   	dimension)	r~   r{   r   segmentsr   r   r   r|   r   Collection z/ already exists, returning existing collection.)collection_uuidr   )r4   rY   rr   r   r   CREATE_COLLECTIONrJ   CollectionModelr	   r   rd   create_collectionr~   r{   r   re   #prepare_segments_for_new_collectioncreate_segmentloggerdebugrh   capturer@   strr   )r_   r{   r   r   r   r|   r   r~   modelcollcreatedr   segmentrW   rW   rX   r      s^   

z#SegmentAPI.get_or_create_collectionc                 C   s   | j |||d||dS )NT)r{   r   r   r   r|   r   )r   )r_   r{   r   r   r|   r   rW   rW   rX   get_or_create_collection  s   zSegmentAPI.get_collectionc                 C   s.   | j j|||d}|r|d S td| d)Nr{   r|   r   r   r    does not exist.)rd   get_collectionsr'   r_   r{   r|   r   existingrW   rW   rX   get_collection1  s   	zSegmentAPI.list_collectionc                 C   s(   | j jtj||d | jj||||dS )N)r}   r|   r   )r   r   r|   r   )rr   r   r   LIST_COLLECTIONSrd   r   )r_   r   r   r|   r   rW   rW   rX   list_collectionsC  s   
zSegmentAPI.list_collectionszSegmentAPI.count_collectionsc                 C   r   )Nr|   r   )rd   count_collections)r_   r|   r   rW   rW   rX   r   W  s   zSegmentAPI._modifyr~   new_namenew_metadatanew_configurationc                 C   s  |rt | |rt| | |}| jjtj|||d |r.|r.|r.| jj||||d d S |r=|r=| jj|||d d S |rL|rL| jj|||d d S |r[|r[| jj|||d d S |rg| jj||d d S |rs| jj||d d S |r| jj||d d S d S )	Nr   )r{   r   r   )r{   r   )r{   r   )r   r   r   )r   )r   )	rY   r5   _get_collectionrr   r   r   UPDATE_COLLECTIONrd   update_collection)r_   r~   r   r   r   r|   r   _rW   rW   rX   _modifya  sF   
	


collection_idc                 C   s   t d)Nz4Collection forking is not implemented for SegmentAPI)NotImplementedError)r_   r   r   r|   r   rW   rW   rX   _fork  s   zSegmentAPI._forkzSegmentAPI.delete_collectionc                 C   sT   | j j|||d}|r"| j|d j | j j|d j||d d S td| d)Nr   r   r   r   r   )rd   r   re   delete_segmentsr~   delete_collectionrS   r   rW   rW   rX   r     s   	
zSegmentAPI._addids
embeddings	metadatas	documentsurisc	              
   C   s   |  |}	| j|tjj t|||||fd|  i tt	tjj|||||d}
| 
|	|
 | jjtj|||||||d | j||
 | jtt|t||d urXt|nd|d urat|nd|d urjt|ndd dS )Nmax_batch_sizer   r   r   r   r   r}   r|   r   r   r   r   r   r   r   )r   
add_amountwith_metadatawith_documents	with_urisT)r   re   hint_use_collectiont	OperationADDr8   get_max_batch_sizelist_records_validate_embedding_record_setrr   r   r   rg   submit_embeddingsrh   r   r;   r   rR   )r_   r   r   r   r   r   r   r|   r   r   records_to_submitrW   rW   rX   _add  sL   


	zSegmentAPI._updatec	              
   C   s   |  |}	| j|tjj t|||||fd|  i tt	tjj|||||d}
| 
|	|
 | jjtj||||||d | j||
 | jtt|t||rUt|nd|r\t|nd|rct|nd|rjt|ndd dS )Nr   r   )r}   r|   r   r   r   r   r   r   )r   update_amountwith_embeddingsr   r   r   T)r   re   r   r   r   UPDATEr8   r   r   r   r   rr   r   r   rg   r   rh   r   r>   r   rR   r_   r   r   r   r   r   r   r|   r   r   r   rW   rW   rX   _update  sL   



zSegmentAPI._upsertc	              
   C   s   |  |}	| j|tjj t|||||fd|  i tt	tjj|||||d}
| 
|	|
 | jjtj|||||||d | j||
 dS )Nr   r   r   T)r   re   r   r   r   UPSERTr8   r   r   r   r   rr   r   r   rg   r   r   rW   rW   rX   _upsert*  s:   


zSegmentAPI._getc                 C   
   t | tS rm   
isinstancer(   erW   rW   rX   <lambda>[     
 zSegmentAPI.<lambda>      T)r   waitstopreraisewherewhere_documentincludec
                 C   s  t t||r
t|ndd | |}
|d urt| |d ur$t| | jjtj	|||||d |r7t|nd}| j
tt|||rE|ndd|v rL|ndd|v rS|ndd|v rZ|ndd | jt|
t|||t|pmd|td|v d|v d|v d	d|v S )
Nr   r   	ids_count)r}   r|   r   r   r   r   r   r   r   )r   r   r   include_metadatainclude_documentsinclude_urisr   F)r   r   rR   _scanr6   r7   rr   r   r   GETrh   r   r=   rf   getr   r   r   r   )r_   r   r   r   r   r   r   r   r|   r   scan
ids_amountrW   rW   rX   _getY  sV   
	

zSegmentAPI._deletec           
      C   s^  t t||r
t|ndd |d urt| |d urt| |d u s-|d urMt|dkrM|d u s;|d urMt|dkrM|d u sI|d urMt|dkrMtd| |}| jjt	j
||||d | j|tjj
 |sm|sm|s}| jt|t|||d }n|}t|dkrd S tttjj
|d}	| |j|	 | j||	 | jtt|t|d d S )Nr   r   aU  
                You must provide either ids, where, or where_document to delete. If
                you want to delete all data in a collection you can delete the
                collection itself using the delete_collection method. Or alternatively,
                you can get() all the relevant ids and then delete them.
                )r}   r|   r   r   r   r   )	operationr   )r   delete_amount)r   r   rR   r6   r7   rS   r   rr   r   r   DELETEre   r   r   r   rf   r   r   r   r   r   r   
collectionrg   r   rh   r   r<   )
r_   r   r   r   r   r|   r   r   ids_to_deleter   rW   rW   rX   _delete  sX   
	zSegmentAPI._countc                 C   r   rm   r   r   rW   rW   rX   r     r   c                 C   s&   t dt|i | jt| |S )Nr   )r   r   rf   countr   r   )r_   r   r|   r   rW   rW   rX   _count  s   zSegmentAPI._queryc                 C   r   rm   r   r   rW   rW   rX   r     r   
   query_embeddings	n_resultsc
                 C   sH  t t||t|d t|}
|rt|nd}| jtt||
|||d ur(|
nd|d ur/|
ndd|v r6|
ndd|v r=|
ndd|v rD|
ndd|v rK|
ndd
 |d urXt| |d ur`t| | |}|D ]}| j	|j
t|dd	 qg| jjtj|||||d
 | jt|t||td ||td|v d|v d|v d|v d|v S )N)r   r  r   r   r   r   r   	distances)
r   query_amountfiltered_ids_amountr  with_metadata_filterwith_document_filterinclude_metadatasr   r   include_distancesFupdate)r}   r|   r   r   r  r  r   )r   r   rR   rh   r   r?   r6   r7   r   _validate_dimensionr   rr   r   r   QUERYrf   knnr   r   r   r   )r_   r   r  r   r  r   r   r   r|   r   r  r   r   	embeddingrW   rW   rX   _query  sf   
	
zSegmentAPI._peeknc                 C   s   t dt|i | j||dS )Nr   )r   )r   r   r   )r_   r   r  r|   r   rW   rW   rX   _peekY  s   
c                 C   s   t S rm   r$   ry   rW   rW   rX   get_versionf     zSegmentAPI.get_versionc                 C   s   d S rm   rW   ry   rW   rW   rX   reset_statej  r  zSegmentAPI.reset_statec                 C   s   | j   dS )NT)rs   r  ry   rW   rW   rX   resetn  s   
zSegmentAPI.resetc                 C   s   | j S rm   )rc   ry   rW   rW   rX   get_settingss  s   zSegmentAPI.get_settingsc                 C   s   | j jS rm   )rg   r   ry   rW   rW   rX   r   w  s   zSegmentAPI.get_max_batch_sizez)SegmentAPI._validate_embedding_record_setr   recordsc                 C   sF   t dt|d i |D ]}|d dur | j|t|d dd qdS )zQValidate the dimension of an embedding record before submitting it to the system.r   r~   r  NTr  )r   r   r  rR   )r_   r   r  recordrW   rW   rX   r     s   dimr  c                 C   s^   |d du r|r|j }| jj||d ||d< dS dS |d |kr-td| d|d  dS )zValidate that a collection supports records of the given dimension. If update
        is true, update the collection if the collection doesn't already have a
        dimension.r   N)r~   r   zEmbedding dimension z* does not match collection dimensionality )r~   rd   r   r&   )r_   r   r  r  r~   rW   rW   rX   r    s   zSegmentAPI._validate_dimensionzSegmentAPI._get_collectionc                 C   s6   | j j|d}|rt|dkrtd| d|d S )N)r~   r   r   r   )rd   r   rR   r'   )r_   r   collectionsrW   rW   rX   r     s   zSegmentAPI._scanc                 C   sN   | j |}dd |d D }t|d |tjj |tjj |tjjd dS )Nc                 S   s   i | ]}|d  |qS )scoperW   ).0r   rW   rW   rX   
<dictcomp>  s    
z$SegmentAPI._scan.<locals>.<dictcomp>r   r   )r   r  r   r  )	rd   get_collection_with_segmentsr   r   SegmentScopeVECTORMETADATAr   RECORD)r_   r   collection_and_segmentsscope_to_segmentrW   rW   rX   r     s   

)rO   N)[__name__
__module____qualname____doc__r   __annotations__r   r   r   r"   r!   r   r   r   r   ro   rH   rv   rz   r    r   	OPERATIONr   r   r   Databaser   r   rA   rB   r   r   r
   r   Tenantr   r^   r   r   r)   boolr   r   r   r   r   r   rI   r   r   r   r   r*   r+   r,   r-   r.   r   r   r   r   r   r   r   r9   r/   r0   r1   r2   r   r   r   r:   r3   r  r  r  r  r  r  r   ALLr#   rD   OperationRecordr   r  r   r   r   __classcell__rW   rW   rt   rX   rb   q   sj  
 

 




F



4

	

5	

6	

,
	

@
K

	
	

G



rb   r   r   r   r   r   r   c                 c   s    |g krd}t |D ]M\}}d}|r|| }|r,|| }	|r(i |d|	i}nd|	i}|rA|| }
|r=i |d|
i}nd|
i}tj||durL|| ndtjj|| d}|V  qdS )ziConvert parallel lists of embeddings, metadatas and documents to a sequence of
    SubmitEmbeddingRecordsNzchroma:documentz
chroma:uri)r~   r  encodingr   r   )	enumerater   r1  ScalarEncodingFLOAT32)r   r   r   r   r   r   ir~   r   documenturir  rW   rW   rX   r     s4   r   )NNNN)rtenacityr   r   r   r   chromadb.apir   %chromadb.api.collection_configurationr   r   r	   chromadb.authr
   chromadb.configr   r   r   r   chromadb.db.systemr   chromadb.quotar   r   chromadb.rate_limitr   chromadb.segmentr   $chromadb.execution.executor.abstractr   &chromadb.execution.expression.operatorr   r   r   r   r   "chromadb.execution.expression.planr   r   r    chromadb.telemetry.opentelemetryr   r   r   r    chromadb.telemetry.productr!   chromadb.ingestr"   chromadb.typesr#   r   chromadbr%   chromadb.errorsr&   r'   r(   chromadb.api.typesr)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   !chromadb.telemetry.product.eventsr;   r<   r=   r>   r?   r@   typesr   typingrA   rB   rC   rD   rE   rF   rG   	overridesrH   uuidrI   rJ   	functoolsrK   rw   loggingrT   rL   	getLoggerr'  r   r   rY   r^   rb   r   r1  r   rW   rW   rW   rX   <module>   st    P 	$	
	      S