o
    ?Hhv}                  	   @   s  d Z ddlZddlZddlmZ ddlmZ ddlZddl	Z	ddl
mZ ddlmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7 dd Z8dd Z9dd Z:dd Z;dd Z<e	j=>ddd ge	j=>d!d"d#ge	j=>d$g d%d&d' Z?e	j=>d(e7d)d* Z@d+d, ZAd-d. ZBd/d0 ZCd1d2 ZDd3d4 ZEd5d6 ZFe	j=>d7e+d8d9 ZGd:d; ZHd<d= ZId>d? ZJd@dA ZKdBdC ZLdDdE ZMdFdG ZNdHdI ZOdJdK ZPdLdM ZQdNdO ZRe	j=>d$g dPdQdR ZSdSdT ZTdUdV ZUe	j=>d$g dPe	j=>dWdXdYdgfdZdYdgfd[ddgfgd\d] ZVd^d_ ZWd`da ZXdbdc ZYdS )dz=
Several basic tests for hierarchical clustering procedures

    N)partial)mkdtemp)	hierarchy)connected_components)AgglomerativeClusteringFeatureAgglomeration	ward_tree)_TREE_BUILDERS_fix_connectivity_hc_cutlinkage_tree)average_merge	max_mergemst_linkage_core)make_circles
make_moons)grid_to_graph)DistanceMetric)adjusted_rand_scorenormalized_mutual_info_score)PAIRED_DISTANCEScosine_distancesmanhattan_distancespairwise_distances)METRICS_DEFAULT_PARAMS)kneighbors_graph)IntFloatDict)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warnings)LIL_CONTAINERSc                  C   s   t jd} | jdd}tt t|dd W d    n1 s"w   Y  tt t|t dd W d    n1 s@w   Y  t	 
| t|}t|dd	}t|d
 t|dd	d
  t|td	}t|d
 t|dd	d
  d S )N*   )   r$   sizefoo)linkage   r*   connectivityprecomputedaffinityr   cosine	manhattan)nprandomRandomStatenormalpytestraises
ValueErrorr   onesr   fitr   r   r   )rngXdisres r?   g/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/cluster/tests/test_hierarchical.pytest_linkage_misc6   s   rA   c            
   	   C   s  t jd} t jddgtd}d|ddddf< | dd}t|j }t	 D ]_}||j
|d\}}}}d	|jd
  d
 }	t|| |	ksHJ tt ||j
t dd W d    n1 sbw   Y  tt ||j
d d |d W d    n1 sw   Y  q(d S )Nr   
   dtyper*      2   d   r+         r)   )r2   r3   r4   r9   boolrandnr   shaper	   valuesTlenr6   r7   r8   )
r;   maskr<   r,   tree_builderchildrenn_componentsn_leavesparentn_nodesr?   r?   r@   test_structured_linkage_treeO   s&   
rW   c                  C   sf  t jd} | dd}||d fD ]H}t ( tt t|j	dd\}}}}W d    n1 s3w   Y  W d    n1 sBw   Y  d|j
d  d }t|| |ksZJ qt D ]Q}||d fD ]H}t ( tt ||j	dd\}}}}W d    n1 sw   Y  W d    n1 sw   Y  d|j
d  d }t|| |ksJ qgq_d S )Nr   rF   rG   rB   )
n_clustersrH   rI   )r2   r3   r4   rK   r!   r6   warnsUserWarningr   rN   rL   rO   r	   rM   )r;   r<   this_XrR   rV   rT   rU   rQ   r?   r?   r@   test_unstructured_linkage_treef   s2   r\   c            	      C   s   t jd} t jddgtd}| dd}t|j }t	 D ] }||j
|d\}}}}d|jd  d }t|| |ks>J qd S )	Nr   rB   rC   rF   rG   r+   rH   rI   )r2   r3   r4   r9   rJ   rK   r   rL   r	   rM   rN   rO   )	r;   rP   r<   r,   linkage_funcrR   rV   rT   rU   r?   r?   r@   test_height_linkage_tree~   s   
r^   c                  C   sZ   t ddgddgg} d}tjt|d t| dd W d    d S 1 s&w   Y  d S )Nr   rI   z;Cosine affinity cannot be used when X contains zero vectorsmatchr0   r.   )r2   arrayr6   r7   r8   r   )r<   msgr?   r?   r@   test_zero_cosine_linkage_tree   s
   "rc   zn_clusters, distance_threshold)N      ?)rB   Ncompute_distancesTFr(   wardcompleteaveragesinglec                 C   s   t jd}t jddgtd}d}||d}t|j }t| ||||d}	|		| |s0|d urNt
|	ds7J |	jjd }
|
d }|	jj|d fksLJ d S t
|	drUJ d S )	Nr   rB   rC   rG   rF   )rX   r,   r(   distance_thresholdre   
distances_rI   )r2   r3   r4   r9   rJ   rK   r   rL   r   r:   hasattr	children_rl   )rX   re   rk   r(   r;   rP   	n_samplesr<   r,   
clustering
n_childrenrV   r?   r?   r@   'test_agglomerative_clustering_distances   s&   

rr   lil_containerc              
   C   s\  t j| }t jddgtd}d}||d}t|j }dD ]}td||d}|	| z&t
 }	td||	|d}|	| |j}
t t |
dksLJ W t|	 nt|	 w td||d}d|_|	| tt|j|
d	 d |_|	| t t |jdksJ td|| d dd df |d}tt |	| W d    n1 sw   Y  qtd| d
dd}tt |	| W d    n1 sw   Y  t D ]+}tdt ||f|dd}|	| tdd |dd}|	| tt|j|jd	 qtd|dd}|	| t|}td|ddd}|	| t|j|j d S )NrB   rC   rG   rF   rf   rX   r,   r(   )rX   r,   memoryr(   FrI   r1   rg   )rX   r,   metricr(   rh   r-   )r2   r3   r4   r9   rJ   rK   r   rL   r   r:   r   labels_r&   uniqueshutilrmtreecompute_full_treer   r   r,   toarrayr6   r7   r8   r   keysr   r   )global_random_seedrs   r;   rP   ro   r<   r,   r(   rp   tempdirlabelsrv   clustering2X_distr?   r?   r@   test_agglomerative_clustering   s   








r   c                  C   s2   t jd} t| dd}tddd| dS )zhAgglomerativeClustering must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    r   rF   rG   	euclideanrj   rv   r(   N)r2   r3   r4   r    rK   r   r:   )r;   Xmmr?   r?   r@   +test_agglomerative_clustering_memory_mapped  s   r   c                 C   s   t j| }t jddgtd}|dd}t|j }td|d}|	| t 
t |jdks2J ||}|jd dks@J ||}t |d j
dksQJ t||| tt |	|d d  W d    d S 1 ssw   Y  d S )	NrB   rC   rF   rG   r$   rX   r,   rI   r   )r2   r3   r4   r9   rJ   rK   r   rL   r   r:   r&   rx   rw   	transforminverse_transformr   r6   r7   r8   )r~   r;   rP   r<   r,   aggloX_redX_fullr?   r?   r@   test_ward_agglomeration  s   



"r   c                  C   sv   t ddd\} }tddd}||  tt|j|d tdd	dd
\}}tddd}|| tt|j|d d S )Ng?r#   )noiserandom_staterH   rj   )rX   r(   rI   rd   g?)factorr   r   )r   r   r:   r   r   rw   r   )moonsmoon_labelsrp   circlescircle_labelsr?   r?   r@   test_single_linkage_clustering2  s   

r   c                 C   sv   g }| |fD ]&}t |}| d }t||f}d|t||f< |t||j q|d |d k s9J dS )zUtil for comparison with scipyrI   r   N)	rO   maxr2   zerosarangeappenddotrN   all)cut1cut2co_clustcutnkecutr?   r?   r@   assess_same_labellingC  s   r   c                 C   sT  d\}}}t j| }t ||f}t D ]s}tdD ]l}d|j||fd }|dt |d d t j	f  8 }||j
ddd d t j	f 8 }tj||d}	|	d d d d	f jtd
d}
t| ||d\}}}}|jdd t||
d|  t|||}t||
|}t|| qqtt t|d || W d    d S 1 sw   Y  d S )NrB   r$      r$   皙?r%         @rI   axismethodrH   Fcopyr+   z2linkage tree differs from scipy impl for linkage: )r2   r3   r4   r9   r	   r}   ranger5   r   newaxismeanr   r(   astypeintsortr   r   r   r6   r7   r8   )r~   r   pr   r;   r,   r(   ir<   outrn   rR   _rT   r   cut_r?   r?   r@   test_sparse_scikit_vs_scipyO  s4   
 "r   c                 C   s   d\}}}t j| }d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }tj|dd}|d d d d	f 	t
}td |\}}	}
}	|jdd t||d
 t|||
}t|||
}t|| d S )Nr   r   r%   r   rI   r   rj   r   rH   z8linkage tree differs from scipy impl for single linkage.)r2   r3   r4   r5   r   r   r   r   r(   r   r   r	   r   r   r   r   )r~   ro   
n_featuresrX   r;   r<   r   children_scipyrR   r   rT   r   	cut_scipyr?   r?   r@   )test_vector_scikit_single_vs_scipy_singlew  s"   
 r   metric_param_gridc                 C   s   t jjdd}|jdd}t|}| \}}| }tj|  D ]#}t	t
||}tj|fi |}	t||	}
t||	}t j|
| q dS )zoThe MST-LINKAGE-CORE algorithm must work on mem-mapped dataset.

    Non-regression test for issue #19875.
    rI   )seed)   r*   r%   N)r2   r3   r4   r5   r    r}   	itertoolsproductrM   dictzipr   
get_metricr   testingassert_equal)r   r;   r<   r   rv   
param_gridr}   valskwargsdistance_metricmstmst_mmr?   r?   r@   #test_mst_linkage_core_memory_mapped  s   

r   c               	   C   s   t g dg dg dg dg dg dg} t g d}t| ddd}d||j  }t| |d	\}}d
D ]}td||d}||  tt|j	|d q6d S )N)r   r   r   )rI   rI   rI   )rH   rH   rH   )r   r   rI   rI   rH   rH   r   Fn_neighborsinclude_selfrd   r   )rj   ri   ri   rg   )rX   r(   r,   rI   )
r2   ra   r   rN   r
   r   r:   r   r   rw   )r<   true_labelsr,   rS   r(   rp   r?   r?   r@   test_identical_points  s   .
r   c                  C   s8   t g d} t| ddd}td|dd}||  d S )N))y&1?gQ?)r   gMbX?)r   gEԸ?g rh?/$?r   ;On?r   r   r   r   r   r   r   )r   g~jt?)r   gOn?)r   g;On?rB   Fr   r*   rg   rt   )r2   ra   r   r   r:   )r<   r,   rg   r?   r?   r@   test_connectivity_propagation  s   r   c           	      C   s   d\}}t j| }t ||f}tdD ]>}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|}t	||d}t
|d	 |d	  qd S )
NrB   r$   r$   r   r%   r   rI   r   r+   r   )r2   r3   r4   r9   r   r5   r   r   r   r   r   )	r~   r   r   r;   r,   r   r<   out_unstructuredout_structuredr?   r?   r@   test_ward_tree_children_order  s    r   c              	   C   s8  d\}}t j| }t ||f}tdD ]}d|j||fd }|dt |d d t jf  8 }||jddd d t jf 8 }t	|dd	}t	||dd
}|d }	|d }
t
|	|
 |d }|d }t|| dD ]/}t|||ddd }t||ddd }|d }|d }|d }|d }t|| t|| qiqt ddgddgddgddgddgddgg}t g dg dg dg dg d g}t g dg dg dg d!g d"g}t g dg dg dg d#g d$g}t |\}}t ||f}t	|dd	}t	||dd
}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  g d'}||g}t||D ]L\}}t|d|d(}t|||dd}t
|d d d d%f |d  t
|d d d d%f |d  t|d d d%f |d&  t|d d d%f |d&  qMd S ))Nr   r$   r   r%   r   rI   r   T)return_distance)r,   r   r   )ri   rh   rj   )r,   r(   r   )r(   r   gя?geGgw7@g})J@gZ!E@gn]#g!܄@g,8g!Yz @gRա&<agڎF@gT!@)g      @r   g0rq5?       @)      ?g      @gAVJS?r   )g        r   gL/u@r   )      @       @g6SHD4"@r   )      @      "@gwʴG8@r   )r   r   gwfۣ@r   )r   r   g63C2@r   )r   r   go;@r   )r   r   g_ .@r   rH   r*   )rh   ri   rj   )r   r(   )r2   r3   r4   r9   r   r5   r   r   r   r   r   r   r   ra   rL   r   )r~   r   r   r;   r,   r   r<   r   r   children_unstructuredchildren_structureddist_unstructureddist_structuredr(   structured_itemsunstructured_itemsstructured_distunstructured_diststructured_childrenunstructured_childrenlinkage_X_wardlinkage_X_completelinkage_X_averagero   r   connectivity_Xout_X_unstructuredout_X_structuredlinkage_optionsX_linkage_truthX_truthr?   r?   r@   &test_ward_linkage_tree_return_distance  s    





r   c                  C   s   t ddgddgg} t ddgddgg}tdd|d}t|dd}tt ||  W d    d S 1 s9w   Y  d S )	Nr   rI   TFrH   )n_xn_yrP   rg   r,   r(   )r2   ra   r   r   r6   rY   rZ   r:   )xmcwr?   r?   r@    test_connectivity_fixing_non_lil`  s   "r  c            	      C   s   t jd} t | jdddjt jdd}| t|}t	||}t
||D ]\}}|| |ks3J q't jdt jdd d d	 }t dd
d d d	 }t	||}t||t jdt jdddd t||t jdt jdddd d S )Nr   rG   rB   r%   Fr   rF   rC   rH   rd   rI   )rP   n_an_b)r2   r3   r4   rx   randintr   intprandrO   r   r   r   fullr   r9   r   )	r;   r}   rM   dkeyvalue
other_keysother_valuesotherr?   r?   r@   test_int_float_dictm  s    

"r  c                  C   sj   t jd} | dd}t|ddd}t|d}tttdddd}|| || t|j	|j	 d S )	Nr   r   r$   r   Fr   r+   r   )
r2   r3   r4   r  r   r   r   r:   r   rw   )r;   r<   r,   aglc1aglc2r?   r?   r@   test_connectivity_callable~  s   


r  c                  C   sn   t jd} | dd}t|ddd}t|ddd}t|d}t|d}|| || t|j|j d S )	Nr   r   r$   r   Fr   Tr+   )	r2   r3   r4   r  r   r   r:   r   rw   )r;   r<   r,   connectivity_include_selfr  r  r?   r?   r@   "test_connectivity_ignores_diagonal  s   



r  c                  C   s   t jd} | dd}t|ddd}td|d}|| |jd }|jjd }||d ks1J d	}| d
d}t|ddd}t||d}|| |jd }|jjd }||| ks^J d S )Nr   rB   rH   r$   Fr   r   rI   e      )	r2   r3   r4   rK   r   r   r:   rL   rn   )r;   r<   r,   agcro   rV   rX   r?   r?   r@   test_compute_full_tree  s    



r  c                  C   sP   t jd} | dd}t d}t D ]}t|||dd dks%J qd S )Nr   r$   r+   rI   )r2   r3   r4   r  eyer	   rM   r!   )r;   r<   r,   r]   r?   r?   r@   test_n_components  s   
r  c                  C   sr   d} t jd}|| | }t g d}t| | |t jd}G dd d}| }t|||jd |j	dks7J d S )	NrH   r   )TFFT)r   r   rP   	return_asc                   @   s   e Zd Zdd Zdd ZdS )z>test_affinity_passed_to_fix_connectivity.<locals>.FakeAffinityc                 S   s
   d| _ d S )Nr   counter)selfr?   r?   r@   __init__  s   
zGtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.__init__c                 _   s   |  j d7  _ | j S )NrI   r  )r  argsr   r?   r?   r@   	increment  s   zHtest_affinity_passed_to_fix_connectivity.<locals>.FakeAffinity.incrementN)__name__
__module____qualname__r   r"  r?   r?   r?   r@   FakeAffinity  s    r&  )r,   r/   r   )
r2   r3   r4   rK   ra   r   ndarrayr   r"  r  )r&   r;   r<   rP   r,   r&  far?   r?   r@   (test_affinity_passed_to_fix_connectivity  s   r)  )rg   rh   ri   c                 C   s   t j|}t jddgtd}d}||d}t|j }d}d |fD ]I}td ||| d}	|		| |	j
}
tt |	j
}t|  }|||d dd\}}}}}t ||kd }||ks\J t|||d	}t |
|skJ q"d S )
NrB   rC   rG   rF   )rX   rk   r,   r(   T)r,   rX   r   rI   )rX   rR   rT   )r2   r3   r4   r9   rJ   rK   r   rL   r   r:   rw   rO   rx   r	   count_nonzeror   array_equiv)r(   r~   r;   rP   ro   r<   r,   rk   connrp   clusters_producednum_clusters_producedrQ   rR   rS   rT   rU   	distancesnum_clusters_at_thresholdclusters_at_thresholdr?   r?   r@   5test_agglomerative_clustering_with_distance_threshold  s8   

r2  c                 C   sx   t j| }d}|jdd|dfd}td ddd|}t|d	d
d}t |t j t 	|dks3J |j
|ks:J d S )NrB   ii,  r   r%   r   rj   rX   rk   r(   	minkowskirH   rv   r   r   )r2   r3   r4   r  r   r:   r   fill_diagonalinfr   n_clusters_)r~   r;   ro   r<   rp   all_distancesr?   r?   r@   test_small_distance_threshold  s   r:  c                 C   s   t j| }d}|jdd|dfd}d}td |dd|}|j}t|d	d
d}t |t j	 t 
|D ]9}||k}	||	 d d |	f jdd }
||	 d d |	 f jdd }|	 dkrg|
|k sgJ ||ksmJ q4d S )NrG   irB   r   r%   r*   rj   r3  r4  rH   r5  r   r   rI   )r2   r3   r4   r  r   r:   rw   r   r6  r7  rx   minr   sum)r~   r;   ro   r<   rk   rp   r   Dlabelin_cluster_maskmax_in_cluster_distancemin_out_cluster_distancer?   r?   r@   .test_cluster_distances_with_distance_threshold  s,    rB  )	thresholdy_truerd   rI   r   g      ?c                 C   s:   dgdgg}t d || d}||}t||dksJ d S )Nr   rI   r3  )r   fit_predictr   )r(   rC  rD  r<   	clusterery_predr?   r?   r@   ?test_agglomerative_clustering_with_distance_threshold_edge_case*  s   
rH  c                  C   s   dgdgg} t jtdd td d d|  W d    n1 s!w   Y  t jtdd tddd|  W d    n1 sAw   Y  dgdgg} t jtdd td ddd	|  W d    d S 1 siw   Y  d S )
Nr   rI   zExactly one of r_   )rX   rk   rH   z!compute_full_tree must be True ifF)rX   rk   r{   )r6   r7   r8   r   r:   )r<   r?   r?   r@   &test_dist_threshold_invalid_parameters:  s   
"rI  c                  C   s^   t jd} | dd}tjtdd tddd| W d    d S 1 s(w   Y  d S )	Nr   r$   r   z>Distance matrix should be square, got matrix of shape \(5, 3\)r_   r-   rh   r   )	r2   r3   r4   r  r6   r7   r8   r   r:   )r;   r<   r?   r?   r@   *test_invalid_shape_precomputed_dist_matrixI  s   "rJ  c                  C   s
  t g dg dg dg dg dg} t| d dksJ t jd}|dd}t|}td	| d
d}d}tj	t
|d || W d   n1 sNw   Y  t| d
d}tj	t
|d || W d   n1 spw   Y  t|j|j t|j|j dS )zCheck that connecting components works when connectivity and
    affinity are both precomputed and the number of connected components is
    greater than 1. Non-regression test for #16151.
    )r   rI   rI   r   r   )r   r   rI   r   r   )r   r   r   r   r   )r   r   r   r   rI   r   rH   r$   rB   r-   rh   )rv   r,   r(   z.Completing it to avoid stopping the tree earlyr_   Nr   )r2   ra   r   r3   r4   rK   r   r   r6   rY   rZ   r:   r   rw   rn   )connectivity_matrixr;   r<   r   clusterer_precomputedrb   rF  r?   r?   r@   @test_precomputed_connectivity_metric_with_2_connected_componentsU  s6   
rM  )Z__doc__r   ry   	functoolsr   tempfiler   numpyr2   r6   scipy.clusterr   scipy.sparse.csgraphr   sklearn.clusterr   r   r   sklearn.cluster._agglomerativer	   r
   r   r   "sklearn.cluster._hierarchical_fastr   r   r   sklearn.datasetsr   r    sklearn.feature_extraction.imager   sklearn.metricsr   sklearn.metrics.clusterr   r   sklearn.metrics.pairwiser   r   r   r   'sklearn.metrics.tests.test_dist_metricsr   sklearn.neighborsr   sklearn.utils._fast_dictr   sklearn.utils._testingr   r   r   r    r!   sklearn.utils.fixesr"   rA   rW   r\   r^   rc   markparametrizerr   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r)  r2  r:  rB  rH  rI  rJ  rM  r?   r?   r?   r@   <module>   s    	
^
(
v
&"