o
    1Xxi݆                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? d dlm@ZA d dlBmCZCmDZDmEZEmFZFmGZGmHZHmIZI d dlJmKZK d dlLmMZMmNZN d dlOZOd dlPZPd dlQZQePReSZTdeUddfddZVG dd deZW				d+deAjXde)deCe* deCe, deCe- deCe. deEeAjYddf fd d!ZZd"eCeAj[ deCeU fd#d$Z\d"eCeAj[ deCeU fd%d&Z]d"eFeCeAj[  deFeCeAj[  fd'd(Z^d"eCeAj[ deCeAj[ fd)d*Z_dS ),    )	ServerAPI)CollectionConfigurationInternal)DEFAULT_DATABASEDEFAULT_TENANTSettingsSystem)SysDB)QuotaEnforcerResource)
rate_limit)SegmentManagerMetadataReaderVectorReader)add_attributes_to_current_spanOpenTelemetryClientOpenTelemetryGranularitytrace_method)ProductTelemetryClient)Producer)
Collection__version__)InvalidDimensionExceptionInvalidCollectionException)URICollectionMetadataDocumentIDs
Embeddings	Embedding	Metadatas	DocumentsURIsWhereWhereDocumentInclude	GetResultQueryResultvalidate_metadatavalidate_update_metadatavalidate_wherevalidate_where_documentvalidate_batch)CollectionAddEventCollectionDeleteEventCollectionGetEventCollectionUpdateEventCollectionQueryEventClientCreateCollectionEventN)OptionalSequence	GeneratorListcastSetDict)override)UUIDuuid4
index_namereturnc                 C   sf   d|  }t | dk st | dkrt|td| st|d| v r't|td| r1t|d S )Na  Expected collection name that (1) contains 3-63 characters, (2) starts and ends with an alphanumeric character, (3) otherwise contains only alphanumeric characters, underscores or hyphens (-), (4) contains no two consecutive periods (..) and (5) is not a valid IPv4 address, got    ?   z'^[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]$z..z0^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$)len
ValueErrorrematch)r=   msg rF   S/home/air/biblejyuku/back/venv/lib/python3.10/site-packages/chromadb/api/segment.pycheck_index_name>   s   	rH   c                       s   e Zd ZU dZeed< eed< eed< eed< e	ed< e
ed< eed< eed	< eeejf ed
< def fddZedefddZedejeefdededdfddZedejeefdededejfddZedejededdfddZedejededejfddZedejedddeefdede e! de e" d e#ded!ede$fd"dZ%ed#ejeddeefdede e! de e" ded!ede$fd$d#Z&ed%ejeddeefde e d&e e ded!ede$f
d'd%Z'ed(ejeddeefd)e e d*e e ded!ede(e$ f
d+d,Z)ed-ejeeefded!edefd.d-Z*ed/eje		dnd&ed0e e d1e e" ddfd2d/Z+ed3ejeeefdeded!eddfd4d3Z,ed5eje-d6e.j/d7e			dod8e0d6ed9e1d:e e2 d;e e3 d<e e4 de#fd=d5Z5ed>eje				dpd6ed8e0d9e e1 d:e e2 d;e e3 d<e e4 de#fd?d>Z6ed@eje			dod6ed8e0d9e1d:e e2 d;e e3 d<e e4 de#fdAd@Z7edBeje-d6e.j8d7edi dddddi g dCf	d6ed8e e0 dDe e9 dEe e d)e e d*e e dFe e dGe e dHe e: dIe;de<fdJdBZ=edKeje			dod6ed8e e0 dDe e9 dHe e: de0f
dLdKZ>edMejed6edefdNdMZ?edOeje-d6e.j@d7edPi i g dQfd6edRe1dSedDe9dHe:dIe;deAfdTdOZBedUejedqd6edVede<fdWdUZCedefdXdYZDedrdZd[ZEede#fd\d]ZFedefd^d_ZGedefd`daZHedbejIdcejddejJddfdedbZKedfejIdcejdgedhe#ddfdidfZLedjejId6edejfdkdjZMd6eddfdldmZN  ZOS )s
SegmentAPIzHAPI implementation utilizing the new segment-based internal architecture	_settings_sysdb_manager	_producer_product_telemetry_client_opentelemetry_client
_tenant_id	_topic_ns_collection_cachesystemc                    sf   t  | |j| _| t| _| t| _| t	| _
| t| _| t| _| t| _i | _d S N)super__init__settingsrJ   requirer   rK   r   rL   r	   _quotar   rN   r   rO   r   rM   rR   )selfrS   	__class__rF   rG   rV   _   s   
zSegmentAPI.__init__r>   c                 C   s   t t S rT   )inttimetime_nsrZ   rF   rF   rG   	heartbeatj   s   zSegmentAPI.heartbeatzSegmentAPI.create_databasenametenantNc                 C   s,   t |dk r
td| jjt ||d d S )Nr?   z0Database name must be at least 3 characters long)idrb   rc   )rA   rB   rK   create_databaser<   rZ   rb   rc   rF   rF   rG   re   n   s   
zSegmentAPI.get_databasec                 C   s   | j j||dS )N)rb   rc   )rK   get_databaserf   rF   rF   rG   rg   z   s   zSegmentAPI.create_tenantc                 C   s&   t |dk r
td| jj|d d S )Nr?   z.Tenant name must be at least 3 characters longrb   )rA   rB   rK   create_tenantrZ   rb   rF   rF   rG   ri      s
   
zSegmentAPI.get_tenantc                 C   s   | j j|dS )Nrh   )rK   
get_tenantrj   rF   rF   rG   rk      s   zSegmentAPI.create_collectionFconfigurationmetadataget_or_createdatabasec              
   C   s   |d urt | t| t }t||||d ur|nt ||d d}| jj|j|j|	 |j
d |||d\}	}
|
rJ| j|	}|D ]}| j| q@n	td| d | jtt|d tdt|i |	S )N)rd   rb   rm   rl   rc   ro   	dimension)rd   rb   rl   rm   rp   rn   rc   ro   Collection z/ already exists, returning existing collection.)collection_uuidrr   )r(   rH   r<   CollectionModelr   rK   create_collectionrd   rb   get_configurationrm   rL   create_segmentscreate_segmentloggerdebugrN   capturer2   strr   )rZ   rb   rl   rm   rn   rc   ro   rd   modelcollcreatedsegmentssegmentrF   rF   rG   rt      sP   

z#SegmentAPI.get_or_create_collectionc                 C   s   | j |||d||dS )NT)rb   rm   rl   rn   rc   ro   )rt   )rZ   rb   rl   rm   rc   ro   rF   rF   rG   get_or_create_collection   s   zSegmentAPI.get_collectionrd   c                 C   sX   |d u r|d u s|d ur|d urt d| jj||||d}|r$|d S t d| d)Nz*Name or id must be specified, but not both)rd   rb   rc   ro   r   rq    does not exist.)rB   rK   get_collections)rZ   rb   rd   rc   ro   existingrF   rF   rG   get_collection   s    	zSegmentAPI.list_collectionlimitoffsetc                 C   s   | j j||||dS )N)r   r   rc   ro   )rK   r   )rZ   r   r   rc   ro   rF   rF   rG   list_collections   s   	zSegmentAPI.list_collectionszSegmentAPI.count_collectionsc                 C   s   t | jj||d}|S )Nrc   ro   )rA   rK   r   )rZ   rc   ro   collection_countrF   rF   rG   count_collections
  s   zSegmentAPI._modifynew_namenew_metadatac                 C   st   |rt | |rt| | | |r |r | jj|||d d S |r,| jj||d d S |r8| jj||d d S d S )N)rb   rm   rh   rm   )rH   r)   _validate_collectionrK   update_collection)rZ   rd   r   r   rF   rF   rG   _modify  s   
zSegmentAPI.delete_collectionc                 C   s   | j j|||d}|r@| j j|d j||d | j|d jD ]}| j | q |r<|d j| jv r>| j|d j= d S d S d S td| d)N)rb   rc   ro   r   r   rq   r   )	rK   r   delete_collectionrd   rL   delete_segmentsdelete_segmentrR   rB   )rZ   rb   rc   ro   r   srF   rF   rG   r   1  s   zSegmentAPI._addcollection_id)subjectresourceids
embeddings	metadatas	documentsurisc           
   	   C   s   | j |||t| | |}| j|tjj t	|||||fd| 
 i g }ttjj|||||dD ]}	| ||	 ||	 q5| j|| | jtt|t||d ur\t|nd|d uret|nd|d urnt|ndd dS )Nmax_batch_sizer   r   r   r   r   r   )rr   
add_amountwith_metadatawith_documents	with_urisT)rY   static_checkr{   _get_collectionrL   hint_use_collectiont	OperationADDr,   get_max_batch_size_records_validate_embedding_recordappendrM   submit_embeddingsrN   rz   r-   rA   )
rZ   r   r   r   r   r   r   r}   records_to_submitrrF   rF   rG   _addH  s:   


	zSegmentAPI._updatec           
   
   C   s   | j |||t| | |}| j|tjj t	|||||fd| 
 i g }ttjj|||||dD ]}	| ||	 ||	 q5| j|| | jtt|t||rZt|nd|rat|nd|rht|nd|rot|ndd dS )Nr   r   r   )rr   update_amountwith_embeddingsr   r   r   T)rY   r   r{   r   rL   r   r   r   UPDATEr,   r   r   r   r   rM   r   rN   rz   r0   rA   
rZ   r   r   r   r   r   r   r}   r   r   rF   rF   rG   _updates  s<   


zSegmentAPI._upsertc           
      C   s   | j |||t| | |}| j|tjj t	|||||fd| 
 i g }ttjj|||||dD ]}	| ||	 ||	 q5| j|| dS )Nr   r   T)rY   r   r{   r   rL   r   r   r   UPSERTr,   r   r   r   r   rM   r   r   rF   rF   rG   _upsert  s(   


zSegmentAPI._get)r   r   r   wheresortpage	page_sizewhere_documentincludec              
   C   sF  t t||r
t|ndd | | |d ur"t|dkr"t|nd }|	d ur2t|	dkr2t|	nd }	| j|t}|d urCt	d|rO|rO|d | }|}|j
||	|||d}t|dkrtg d|
v rgg nd d|
v rng nd d|
v rug nd d	|
v r|g nd d
|
v rg |
dS d |
dS g }d|
v rdd |D }| j|t}|j|d}dd |D }d|
v rdd |D }d	|
v rdd |D }|rt|nd}| jtt|||r|ndd|
v r|ndd|
v r|ndd	|
v r|ndd tdd |D d|
v rdd |D nd d|
v rt|nd d|
v r|nd d	|
v r|nd d |
dS )Nr   r   	ids_countzSorting is not yet supported   )r   r   r   r   r   r   r   r   r   data)r   r   r   r   r   r   includedc                 S      g | ]}|d  qS rd   rF   .0r   rF   rF   rG   
<listcomp>      z#SegmentAPI._get.<locals>.<listcomp>r   c                 S   r   r   rF   r   rF   rF   rG   r     r   c                 S      g | ]}t |qS rF   _docr   mrF   rF   rG   r   
  r   c                 S   r   rF   _urir   rF   rF   rG   r     r   )rr   r   r   include_metadatainclude_documentsinclude_urisc                 S   r   r   rF   r   rF   rF   rG   r     r   c                 S   r   	embeddingrF   r   rF   rF   rG   r     r   )r   r{   rA   r   r*   r+   rL   get_segmentr   NotImplementedErrorget_metadatar&   r   get_vectorsrN   rz   r/   _clean_metadatas)rZ   r   r   r   r   r   r   r   r   r   r   metadata_segmentrecordsvectors
vector_idsvector_segmentr   r   r   
ids_amountrF   rF   rG   _get  s   
 




zSegmentAPI._deletec                 C   s  t t||r
t|ndd |d urt|dkrt|nd }|d ur-t|dkr-t|nd }|d u s=|d ur]t|dkr]|d u sK|d ur]t|dkr]|d u sY|d ur]t|dkr]td| |}| j|t	j
j |sq|sq|s| j|t}|j|||d}dd |D }n|}t|dkrg S g }	tt	j
j|dD ]}
| ||
 |	|
 q| j||	 | jtt|t|d |S )	Nr   r   aU  
                You must provide either ids, where, or where_document to delete. If
                you want to delete all data in a collection you can delete the
                collection itself using the delete_collection method. Or alternatively,
                you can get() all the relevant ids and then delete them.
                )r   r   r   c                 S   r   r   rF   r   rF   rF   rG   r   Z  r   z&SegmentAPI._delete.<locals>.<listcomp>)	operationr   )rr   delete_amount)r   r{   rA   r*   r+   rB   r   rL   r   r   r   DELETEr   r   r   r   r   r   rM   r   rN   rz   r.   )rZ   r   r   r   r   r}   r   r   ids_to_deleter   r   rF   rF   rG   _delete)  sN   	 
	zSegmentAPI._countc                 C   s0   t dt|i | | | j|t}| S )Nr   )r   r{   r   rL   r   r   count)rZ   r   r   rF   rF   rG   _countn  s   
zSegmentAPI._query
   )r   r   	distancesquery_embeddings	n_resultsc                    s  t t||t|d t|}| jtt||||d ur|nd|d ur&|ndd|v r-|ndd|v r4|ndd|v r;|ndd|v rB|ndd	 |d urUt|dkrUt|n|}|d uret|dkret|n|}d }| |}	|D ]}
| j	|	t|
dd	 qp|s|r| j
|t}|j||d
}dd |D }g }g }g }g }g }g }|d ur|g krtt|D ]4}|g  d|v r|g  d|v r|g  d|v r|g  d|v r|g  d|v r|g  qntj|||d|v d d}| j
|t}||}|D ]+}|dd |D  d|v r|dd |D  d|v r*|dd |D  q d|v s;d|v s;d|v rt }|D ]}|| q@| j
|t}|jt|d}dd |D  |D ]:} fdd|D }d|v rx|t| d|v rdd |D }|| d|v rdd |D }|| qat||r|nd |r|nd |r|nd |r|nd |r|nd d |dS )N)r   r   r   r   r   r   r   r   )	rr   query_amountr   with_metadata_filterwith_document_filterinclude_metadatasr   r   include_distancesFupdate)r   r   c                 S   r   r   rF   r   rF   rF   rG   r     r   z%SegmentAPI._query.<locals>.<listcomp>r   )r   kallowed_idsinclude_embeddingsoptionsc                 S   r   r   rF   r   rF   rF   rG   r     r   c                 S   r   )distancerF   r   rF   rF   rG   r     r   c                 S   s   g | ]	}t t|d  qS r   )r7   r   r   rF   rF   rG   r         r   c                 S   s   i | ]	}|d  |d qS )rd   rm   rF   r   rF   rF   rG   
<dictcomp>  r   z%SegmentAPI._query.<locals>.<dictcomp>c                    s   g | ]}  |d qS rT   )get)r   rd   metadata_by_idrF   rG   r     s    c                 S   r   rF   r   r   rF   rF   rG   r     r   c                 S   r   rF   r   r   rF   rF   rG   r     r   )r   r   r   r   r   r   r   r   )r   r{   rA   rN   rz   r1   r*   r+   r   _validate_dimensionrL   r   r   r   ranger   r   VectorQueryr   query_vectorssetr   listr   r'   )rZ   r   r   r   r   r   r   r   r   r}   r   metadata_readerr   r   r   r   r   r   r   emqueryvector_readerresultsresultall_idsid_listmetadata_listdoc_listuri_listrF   r   rG   _queryw  s    















zSegmentAPI._peeknc                 C   s   t dt|i | j||dS )Nr   )r   )r   r{   r   )rZ   r   r  rF   rF   rG   _peek  s   c                 C   s   t S rT   r   r`   rF   rF   rG   get_version  s   zSegmentAPI.get_versionc                 C   s
   i | _ d S rT   )rR   r`   rF   rF   rG   reset_state	  s   
zSegmentAPI.reset_statec                 C   s   | j   dS )NT)_systemr  r`   rF   rF   rG   reset  s   
zSegmentAPI.resetc                 C   s   | j S rT   )rJ   r`   rF   rF   rG   get_settings  s   zSegmentAPI.get_settingsc                 C   s   | j jS rT   )rM   r   r`   rF   rF   rG   r     s   zSegmentAPI.get_max_batch_sizez%SegmentAPI._validate_embedding_record
collectionrecordc                 C   s<   t dt|d i |d r| j|t|d dd dS dS )zQValidate the dimension of an embedding record before submitting it to the system.r   rd   r   Tr   N)r   r{   r   rA   )rZ   r  r  rF   rF   rG   r     s   zSegmentAPI._validate_dimensiondimr   c                 C   sd   |d du r|r|j }| jj||d || j| d< dS dS |d |kr0td| d|d  dS )zValidate that a collection supports records of the given dimension. If update
        is true, update the collection if the collection doesn't already have a
        dimension.rp   N)rd   rp   zEmbedding dimension z* does not match collection dimensionality )rd   rK   r   rR   r   )rZ   r  r  r   rd   rF   rF   rG   r   '  s   zSegmentAPI._get_collectionc                 C   sD   || j vr| jj|d}|std| d|d | j |< | j | S )z&Read-through cache for collection datar   rq   r   r   )rR   rK   r   r   )rZ   r   collectionsrF   rF   rG   r   :  s   


c                 C   s   |  | d S rT   )r   )rZ   r   rF   rF   rG   r   F  s   zSegmentAPI._validate_collection)NN)NNNNNNN)r   )r>   N)P__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r{   r9   r;   r   r   r   rV   r:   r]   ra   r   r   	OPERATIONr   re   Databaserg   ri   Tenantrk   r   r3   r   r   boolrs   rt   r   r   r4   r   r   r   r   r   r
   ADD_PER_MINUTEr   r   r    r!   r"   r   r   r   GET_PER_MINUTEr#   r$   r%   r&   r   r   r   QUERY_PER_MINUTEr'   r  r  r  r  r  r  r   ALLOperationRecordr   r   r   r   __classcell__rF   rF   r[   rG   rI   R   sr  
 


 


>






(
*
	

f
C
 



rI   r   r   r   r   r   r   c                 c   s    t |D ]K\}}d}|r|| }|r&|| }	|r"i |d|	i}nd|	i}|r;|| }
|r7i |d|
i}nd|
i}tj||rD|| ndtjj|| d}|V  qdS )ziConvert parallel lists of embeddings, metadatas and documents to a sequence of
    SubmitEmbeddingRecordsNchroma:document
chroma:uri)rd   r   encodingrm   r   )	enumerater   r*  ScalarEncodingFLOAT32)r   r   r   r   r   r   ird   rm   documenturir  rF   rF   rG   r   J  s0   r   rm   c                 C      | rd| v rt | d S dS )z2Retrieve the document (if any) from a Metadata mapr,  Nr{   r   rF   rF   rG   r   v     r   c                 C   r5  )z-Retrieve the uri (if any) from a Metadata mapr-  Nr6  r   rF   rF   rG   r   ~  r7  r   c                 C   s   dd | D S )zhRemove any chroma-specific metadata keys that the client shouldn't see from a
    list of metadata maps.c                 S   r   rF   )_clean_metadatar   rF   rF   rG   r     r   z$_clean_metadatas.<locals>.<listcomp>rF   r   rF   rF   rG   r     s   r   c                 C   sD   | sdS i }|   D ]\}}|ds|||< q
t|dkr dS |S )z_Remove any chroma-specific metadata keys that the client shouldn't see from a
    metadata map.Nzchroma:r   )items
startswithrA   )rm   r
  r   vrF   rF   rG   r8    s   
r8  r  )`chromadb.apir   chromadb.api.configurationr   chromadb.configr   r   r   r   chromadb.db.systemr   chromadb.quotar	   r
   chromadb.rate_limitingr   chromadb.segmentr   r   r    chromadb.telemetry.opentelemetryr   r   r   r   chromadb.telemetry.productr   chromadb.ingestr   chromadb.typesr   rs   chromadbr   chromadb.errorsr   r   chromadb.api.typesr   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   !chromadb.telemetry.product.eventsr-   r.   r/   r0   r1   r2   typesr   typingr3   r4   r5   r6   r7   r8   r9   	overridesr:   uuidr;   r<   r^   loggingrC   	getLoggerr  rx   r{   rH   rI   r   r*  r   Metadatar   r   r   r8  rF   rF   rF   rG   <module>   st    T 	$
      
,
"