o
    ?Hh"                     @   s  d Z ddlZddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZ ddlmZmZ dd	lmZ d
d Zdd Zdd Zdd Zejdedd Zdd Zdd Zdd Zdd Zdd Z dd  Z!d!d" Z"d#d$ Z#d%d& Z$d'd( Z%d)d* Z&dS )+z+
Tests for the birch clustering algorithm.
    N)AgglomerativeClusteringBirch)generate_clustered_data)
make_blobs)ConvergenceWarning)pairwise_distances_argminv_measure_score)assert_allcloseassert_array_equal)CSR_CONTAINERSc                 C   s   t d| d\}}|j|dd}t }|| tdd |jjD }tdd | D }||jd ks6J ||jd ks?J d S )	N
   	n_samplesrandom_stateFcopyc                 S   s   g | ]}|j qS  )
n_samples_).0scr   r   `/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/cluster/tests/test_birch.py
<listcomp>   s    z/test_n_samples_leaves_roots.<locals>.<listcomp>c                 S   s   g | ]}|j D ]}|jqqS r   )subclusters_r   )r   leafr   r   r   r   r      s    r   )	r   astyper   fitsumroot_r   _get_leavesshape)global_random_seedglobal_dtypeXybrcn_samples_rootn_samples_leavesr   r   r   test_n_samples_leaves_roots   s   
r'   c                 C   s   t d| d\}}|j|dd}tdd}|| td d}||d d  ||dd   t|j|j |jdd |d  t|j	|j	 d S )Nd   r   Fr      
n_clusters2   )
r   r   r   r   partial_fitr	   subcluster_centers_
set_paramsr
   subcluster_labels_)r    r!   r"   r#   r$   brc_partialr   r   r   test_partial_fit   s   



r2   c           	      C   s   t j| }tdddd}|j|dd}t d}|| ||d d f }tddd	}|| |j	j
|ks9J t|j|| |j	}|jt|| }tt||jd d S )
Nr)   r   )r+   
n_featuresn_samples_per_clusterFr         g      ?r+   	threshold)nprandomRandomStater   r   arangeshuffler   r   r.   dtyper
   labels_predictr0   r   r	   r   )	r    r!   rngr"   shuffle_indices	X_shuffler$   	centroidsnearest_centroidr   r   r   test_birch_predict1   s   


rF   c                 C   s   t dd| d\}}|j|dd}tdd}|| t|jdks#J tt|jdks/J t	dd}t|d}|| t
|j|j t
|j|j tdd}tt || W d    d S 1 sgw   Y  d S )	Nr(   r   r   centersr   Fr   r*   g     @)r8   )r   r   r   r   lenr.   r9   uniquer?   r   r
   r0   pytestwarnsr   )r    r!   r"   r#   brc1gcbrc2brc4r   r   r   test_n_clustersI   s   





"rQ   csr_containerc                 C   s   t dd| d\}}|j|dd}tdd}|| ||}tdd}|| |jj|ks0J t|j|j t|j|j d S )Nr(   r   rG   Fr   r*   )	r   r   r   r   r.   r>   r
   r?   r	   )r    r!   rR   r"   r#   r$   csr
brc_sparser   r   r   test_sparse_X`   s   



rU   c                  C   sv   t dd\} }tdd}|| | d}tjt|d || d d dgf | W d    d S 1 s4w   Y  d S )Nr(   )r   r)   r*   z3X has 1 features, but Birch is expecting 2 featuresmatchr   )r   r   r-   rK   raises
ValueError)r"   r#   r$   msgr   r   r   )test_partial_fit_second_call_error_checkss   s   
"r[   c                 C   s6   | j }|t|ksJ |D ]}|jrt|j| qd S )N)r   rI   child_check_branching_factor)nodebranching_factorsubclustersclusterr   r   r   r]      s   r]   c                 C   sl   t | d\}}|j|dd}d}td |dd}|| t|j| td|dd}|| t|j| d S )Nr   Fr   	   g{Gz?)r+   r_   r8   r)   )r   r   r   r   r]   r   )r    r!   r"   r#   r_   r$   r   r   r   test_branching_factor   s   

rd   c                 C   s<   | j j}|r|j}|D ]	}||jksJ q|j}|sdS dS )z&Use the leaf linked list for traversalN)dummy_leaf_
next_leaf_r   radius)birch_instancer8   current_leafr`   r   r   r   r   check_threshold   s   rj   c                 C   sd   t dd| d\}}|j|dd}tdd d}|| t|d tdd d}|| t|d d S )	NP   r6   rG   Fr   g      ?)r8   r+   g      @)r   r   r   r   rj   )r    r!   r"   r#   r$   r   r   r   test_threshold   s   


rl   c                  C   s,   t dd\} }td}t|d|  d S )Nr   rb      r*   )r   r9   int64r   r   )r"   _r+   r   r   r   test_birch_n_clusters_long_int   s   
rp   c                  C   sV   t dddd\} }tdd}||  |jjd }| }tdd t|D | dS )	z*Check `get_feature_names_out` for `Birch`.rk   r6   r   r   r3   r   r*   c                 S   s   g | ]}d | qS )birchr   )r   ir   r   r   r      s    z*test_feature_names_out.<locals>.<listcomp>N)r   r   r   r.   r   get_feature_names_outr
   range)r"   ro   r$   r+   	names_outr   r   r   test_feature_names_out   s   

rw   c                 C   sL   t dd| d\}}tddd}||}||tj}t||dd d S )Nrk   r6   rq   g?r7   gư>)atol)r   r   fit_transformr   r9   float32r	   )r    r"   ro   r$   Y_64Y_32r   r   r   "test_transform_match_across_dtypes   s
   
r}   c                 C   s@   t ddddd j| dd}tdd}||jj| ksJ d S )Nrk   r6   r   rq   Fr   r*   )r   r   r   r   r.   r>   )r!   r"   r$   r   r   r   test_subcluster_dtype   s
   
r~   c                  C   s   t jddgddgddgddgd	d
gddgddgddgddgddgddgddgddgd	d
gddgd	d
gddgddgddgddgddggt jd} tdddd|  dS )zCheck that both subclusters are updated when a node a split, even when there are
    duplicated data points. Non-regression test for #23269.
    g@̙Hg\sGg%g44Tg}Thg\Fgzog/IwgY+ctEgBlCg}
gr'g_ guXgaeR^g	AX7g.^g|SAgkg^#g@cgi8K)r>   rm   gh㈵>N)r_   r8   r+   )r9   arrayrz   r   r   )r"   r   r   r   test_both_subclusters_updated   s4   r   c                  C   s\   t dddd\} }tddd}tjtdd ||  W d    d S 1 s'w   Y  d S )	Nrk   r6   r   rq   T)r+   r   z`copy` was deprecatedrV   )r   r   rK   rL   FutureWarningr   )r"   ro   r$   r   r   r   test_birch_copy_deprecated   s
   "r   )'__doc__numpyr9   rK   sklearn.clusterr   r   sklearn.cluster.tests.commonr   sklearn.datasetsr   sklearn.exceptionsr   sklearn.metricsr   r   sklearn.utils._testingr	   r
   sklearn.utils.fixesr   r'   r2   rF   rQ   markparametrizerU   r[   r]   rd   rj   rl   rp   rw   r}   r~   r   r   r   r   r   r   <module>   s6    

	%