o
    ?HhL                  	   @   sN  U d dl Z d dlZd dlmZmZ d dlZd dlZd dlm	Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZ d dlmZmZmZmZmZ d dlmZ egZee ed< egZ ee ed	< ee  Z!egZ"ee ed
< egZ#ee ed< e"e# Z$		dVddZ%dd Z&d\Z'Z(e)e'e( d Z*ej+,dddgddgfddgddgfddgddgfgdd Z-d d! Z.d"d# Z/d$d% Z0d&d' Z1d(d) Z2ej+,d*e!d+d, Z3ej+,d*ed-d. Z4d/d0 Z5d1d2 Z6d3d4 Z7ej+,d5ed6d7 Z8ej+,d5ed8d9 Z9ej+,d5ed:d; Z:ej+,d5ed<d= Z;ej+,d5ed>d? Z<ej+,d5ed@dA Z=ej+,d5edBdC Z>dDdE Z?ej+,d5eej+,dFe$dGdH Z@ej+,d5eej+,dIdJej+,dKdJej+,dFe$ej+,dLdMdNgdOdP ZAej+,dFe$ej+,dQejBejBfejCejCfejDejCfejEejCffdRdS ZFej+,dFe$dTdU ZGdS )W    N)AnyList)DataDimensionalityWarningNotFittedError)euclidean_distances)GaussianRandomProjectionSparseRandomProjection_gaussian_random_matrix_sparse_random_matrixjohnson_lindenstrauss_min_dim)assert_allcloseassert_allclose_dense_sparseassert_almost_equalassert_array_almost_equalassert_array_equal)COO_CONTAINERSall_sparse_random_matrixall_dense_random_matrixall_SparseRandomProjectionall_DenseRandomProjectioncsrc                 C   sX   t j|}| |||j||d|j||dff||fd}|dur(||S | S )zMake some random data with uniformly located non zero entries with
    Gaussian distributed values; `sparse_format` can be `"csr"` (default) or
    `None` (in which case a dense array is returned).
    sizeshapeN)nprandomRandomStaterandnrandintasformattoarray)coo_container	n_samples
n_features
n_nonzerosrandom_statesparse_formatrngdata_coo r*   d/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/tests/test_random_projection.pymake_sparse_random_data$   s   

r,   c                 C   s   t | s| S |  S N)spissparser!   )matrixr*   r*   r+   densifyA   s   
r1   )
     g      Y@zn_samples, epsd   n   ?皙?Z   皙?        2   i皙?c                 C   s<   t t t| |d W d    d S 1 sw   Y  d S )Neps)pytestraises
ValueErrorr   )r#   r>   r*   r*   r+   test_invalid_jl_domainQ   s   	"rB   c                   C   sh   t t tddg ddg d W d    n1 sw   Y  ttjjdddd	tdd
d d S )N   r4      r6   r=      r2   )r2   r2   r         ?)r?   r@   rA   r   r   r   r   fullr*   r*   r*   r+   test_input_size_jl_min_dim^   s   
rH   c              	   C   sN   g d}|D ]\}}t t | || W d    n1 sw   Y  qd S )N))r   r   )rE   )rE   rI   )rE   r   )rI   r   r?   r@   rA   random_matrixinputsn_componentsr$   r*   r*   r+   check_input_size_random_matrixj   s   rO   c                 C   s2   g d}|D ]\}}| ||j ||fksJ qd S )N))rE      )rP   rE   )rP   rP   )rE   rE   r   rK   r*   r*   r+   check_size_generatedq   s   
rQ   c                 C   s<   t | dddd}tdt|d tdtj|d d S )Ni'  rE   r   r&   rC         ?)r1   r   r   meanlinalgnorm)rL   Ar*   r*   r+   check_zero_mean_and_unit_normz   s   rX   c              	   C   sN   d\}}dD ]}t t | |||d W d    n1 sw   Y  qd S )N)rP   r2   )g      r:   r7   densityrJ   )rL   rN   r$   rZ   r*   r*   r+   %check_input_with_sparse_random_matrix   s   r[   rL   c                 C   s   t |  t|  t|  d S r-   )rO   rQ   rX   )rL   r*   r*   r+   $test_basic_property_of_random_matrix   s   r\   c                 C   s"   t |  tj| dd}t| d S )NrS   rY   )r[   	functoolspartialrX   )rL   random_matrix_denser*   r*   r+   +test_basic_property_of_sparse_random_matrix   s   r`   c                  C   sF   d} d}t | |dd}tdt|d ttj|ddd|  d d S )	Nr4   r3   r   rR   r:   rD   rE   ddof)r	   r   r   rT   var)rN   r$   rW   r*   r*   r+   test_gaussian_random_matrix   s
   rd   c               	   C   s  d} d}dD ]}d| }t | ||dd}t|}t|}t|t|  |v s+J t| t|  |v s:J |dkrHt|dksGJ nd	|v sNJ t|d
ksWJ tt|d	kdd|  dd tt|t|t|  kdd|  dd tt|t| t|  kdd|  dd ttj|d	kdddd|  d | dd ttj|t|t|  kddddd|   d d|  dd ttj|t| t|  kddddd|   d d|  dd qd S )Nr4   i  )g333333?rS   rE   r   )rZ   r&   rS   rD   r:   rC   )decimalra   )	r
   r1   r   uniquesqrtr   r   rT   rc   )rN   r$   rZ   srW   valuesr*   r*   r+   test_sparse_random_matrix   sD   
 	(*, "rj   c               	   C   sV   d} g dg}t D ]}tt || d| W d    n1 s#w   Y  q	d S )Nauto)r   rE   rD   rN   )all_RandomProjectionr?   r@   rA   fit)rN   fit_dataRandomProjectionr*   r*   r+   0test_random_projection_transformer_invalid_input   s   
rq   r"   c              	   C   s\   t | ttt|d d}tD ]}tt |dd| W d    n1 s&w   Y  qd S )Nr&   r'   rk   rl   )	r,   r#   r$   r%   rm   r?   r@   r   	transform)r"   global_random_seeddatarp   r*   r*   r+    test_try_to_transform_before_fit   s   rv   c              	   C   sj   t | ddd|d d}tD ]&}|ddd}d}tjt|d || W d    n1 s-w   Y  qd S )	Nr3   r4   r#   r$   r%   r&   r'   rk   r9   )rN   r>   z~eps=0.100000 and n_samples=1000 lead to a target dimension of 5920 which is larger than the original space with n_features=100)match)r,   rm   r?   r@   rA   rn   )r"   rt   ru   rp   rpexpected_msgr*   r*   r+   .test_too_many_samples_to_find_a_safe_embedding   s"   	r{   c           
      C   s   t | ddddd d}d}t|dd}| }|d	k}|| }tD ]4}|d
|dd}||}t|dd}| }|| }|| }	|	 d| k sJJ d| |	 k sTJ q d S )N   i  i:  r   rw   r<   T)squaredr:   rk   )rN   r>   r&   rE   )r,   r   ravelrm   fit_transformmaxmin)
r"   ru   r>   original_distancesnon_identicalrp   ry   	projectedprojected_distancesdistances_ratior*   r*   r+   (test_random_projection_embedding_quality  s.   
r   c                 C   s   t | tttdd d}t | tttddd}tD ]E}|dddd}|| t||tj	s/J t||tj	s:J |dddd}||}t||tj	sQJ t
||s[J qd S )Nr   rr   r   r2   T)rN   dense_outputr&   F)r,   r#   r$   r%   r   rn   
isinstancers   r   ndarrayr.   r/   )r"   
dense_datasparse_dataSparseRandomProjry   r*   r*   r+   +test_SparseRandomProj_output_representation7  s4   

r   c           
   	   C   s  t | ttt|d d}tD ]}|dddd|}|jdksJ |jdks&J |tv r8|j	dks1J t
|jdd |jjdtfksBJ ||}|jtdfksPJ ||}t|| |ddd	}||}t|| tt ||d d d
df  W d    n1 sw   Y  |tv r|dddd}||}	|	jtdfksJ |jjdtfksJ |jjdk sJ d|jjk sJ qd S )Nrr   rk   r   rF   )rN   r&   r>   r5   gQ?rD   )r&   r>   rE   rP   r4   gMbP?)rN   rZ   r&   s   U   )r,   r#   r$   r%   rm   rn   rN   n_components_r   rZ   r   density_components_r   rs   r   r   r?   r@   rA   nnz)
r"   rt   ru   rp   ry   projected_1projected_2rp2projected_3r   r*   r*   r+   2test_correct_RandomProjection_dimensions_embedding[  sF   





r   c              	   C   st   d}d}t |d }t| ||||d d}tD ]!}tt ||d d| W d    n1 s2w   Y  qd S )N   rP      rr   rE   rl   )intr,   rm   r?   warnsr   rn   )r"   rt   r$   r#   r%   ru   rp   r*   r*   r+   1test_warning_n_components_greater_than_n_features  s"   	r   c           
      C   s   d}d}t |d }t| ||||d d}t| ||||dd}tD ]}|ddd|}|ddd|}	tt|jt|	j q d S )	Nr   rP   r   rr   r   rC   rE   )rN   r&   )r   r,   rm   rn   r   r1   r   )
r"   rt   r$   r#   r%   r   r   rp   rp_dense	rp_sparser*   r*   r+   test_works_with_sparse_data  s4   	r   c                   C   s   t ddddks
J dS )zyTest Johnson-Lindenstrauss for small eps.

    Regression test for #17111: before #19374, 32-bit systems would fail.
    r4   h㈵>r=   l   JWN)r   r*   r*   r*   r+   "test_johnson_lindenstrauss_min_dim  s   r   random_projection_clsc                    sj   t | ttt|d d}|dd}|| | }|j  tj	 fddt
|jD td}t|| d S )Nrr   rD   rl   c                    s   g | ]}  | qS r*   r*   ).0iclass_name_lowerr*   r+   
<listcomp>  s    z<test_random_projection_feature_names_out.<locals>.<listcomp>)dtype)r,   r#   r$   r%   rn   get_feature_names_out__name__lowerr   arrayranger   objectr   )r"   r   rt   ru   random_projection	names_outexpected_names_outr*   r   r+   (test_random_projection_feature_names_out  s"   


r   r#   )rD   	   r2      r3   r$   compute_inverse_componentsTFc              	   C   s  d}||||d}t | |||| d d |d d}t | |||| d d |dd}	||	fD ][}
t  tjddtd	 ||
}W d    n1 sKw   Y  |ret|d
sYJ |j}|j||fkseJ |	|}|j|
jksrJ |
|}t|dr| }t||ddd q-d S )Nr2   )rN   r   r&   r4   rE   )r%   r&   r'   r   ignorez>The number of components is higher than the number of features)messagecategoryinverse_components_r!   gHz>g|=)rtolatol)r,   warningscatch_warningsfilterwarningsr   r   hasattrr   r   inverse_transformrs   r!   r   )r"   r#   r$   r   r   rt   rN   r   X_denseX_csrXr   inv_componentsprojected_backprojected_againr*   r*   r+   test_inverse_transform  sT   	




r   zinput_dtype, expected_dtypec                 C   sT   t jd}|dd}| dd}|||}|jj|ks!J |j|ks(J d S )N*        r   rR   )r   r   r   randr   astyper   r   )r   input_dtypeexpected_dtyper(   r   ry   transformedr*   r*   r+   "test_random_projection_dtype_match#  s   
r   c                 C   st   d}t jd}|dd}| dd}| dd}||t j}||t j}t|||d t	|j
|j
 d S )Nr   r   r   r   r   rR   )r   )r   r   r   r   r   r   float32float64r   r   r   )r   r   r(   r   rp_32rp_64projection_32projection_64r*   r*   r+   ,test_random_projection_numerical_consistency:  s   

r   )Nr   )Hr]   r   typingr   r   numpyr   r?   scipy.sparsesparser.   sklearn.exceptionsr   r   sklearn.metricsr   sklearn.random_projectionr   r   r	   r
   r   sklearn.utils._testingr   r   r   r   r   sklearn.utils.fixesr   r   __annotations__r   all_random_matrixr   r   rm   r,   r1   r#   r$   r   r%   markparametrizerB   rH   rO   rQ   rX   r[   r\   r`   rd   rj   rq   rv   r{   r   r   r   r   r   r   r   r   r   r   int32int64r   r   r*   r*   r*   r+   <module>   s   
 

	


9


%
#
4

:



	