o
    `^hC                     @   sJ  d dl Z d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ g dg dg dg dgZejd	d
g dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgdfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfdg dg dg dg dgg dfgdd Zdd Zejdd
gdd Zdd Zejd	d
g dg dg dg d gdfdg dg dg dg d gdfdg dg dg d g d gdfdg dg dg d g d gg d!fdg dg dg dg dgg d"fdg dg dg dg d gg dfgd#d$ Zejd%d&d' Zejdddgd(d) Zejdg d*d+d, Zd-d. Zejd/ed0d1d2d3 Zd4d5 Z ejd6d
g d7g d8g d9fdg d7g d:g d;fdg d<g d:g d=fgd>d? Z!ejd@d
g dAg dBg dCg dDgfdg dEg dEg dFg dGgfdg dHg dIg dJg dJgfgejdKg dLdMdN Z"ejdg d*dOdP Z#dQdR Z$ejdSdg dTfdg dUfgdVdW Z%dXdY Z&ejdZej'ej(ej)gejd[dej(ej)gejdKg dLd\d] Z*ejd^ej'ej(ej)gejdKg dLd_d` Z+dadb Z,ejdcdddedf edgD fdhdidf edgD fdjdkdf edgD fgdldm Z-ejdg d*dndo Z.dpdq Z/dS )r    N)clone)KBinsDiscretizerOneHotEncoder)assert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal      ?)r         @      )r   g      @r
         ?)   g      @r      z!strategy, expected, sample_weightuniform)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   kmeans)r   r   r   r   quantile)r   r   r   r   )r   r   r   r   )r   r      r   c                 C   s0   t dd| d}|jt|d t||t d S )Nr   ordinaln_binsencodestrategysample_weight)r   fitXr   	transform)r   expectedr   est r$   m/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/sklearn/preprocessing/tests/test_discretization.pytest_fit_transform   s   "r&   c                   C   sR   t ddt t tdgd dt t ddtjjttks'J d S )Nr   r   r   )	r   fit_transformr    nparrayr   n_bins_dtypeintr$   r$   r$   r%   test_valid_n_bins:   s   &r.   r   c                 C   sb   t jttd}td| d}d}tjt|d |jt|d W d   dS 1 s*w   Y  dS )z=Check that we raise an error when the wrong strategy is used.)shaper   )r   r   zK`sample_weight` was provided but it cannot be used with strategy='uniform'.matchr   N)	r)   oneslenr    r   pytestraises
ValueErrorr   )r   r   r#   err_msgr$   r$   r%   1test_kbinsdiscretizer_wrong_strategy_with_weights@   s   "r8   c                  C   sB  t dd} t| d}d}tjt|d |t W d    n1 s$w   Y  g d} t| d}d}tjt|d |t W d    n1 sKw   Y  g d} t| d}d}tjt|d |t W d    n1 srw   Y  g d	} t| d}d
}tjt|d |t W d    d S 1 sw   Y  d S )N)r             @r'   z:n_bins must be a scalar or array of shape \(n_features,\).r0   )r   r   r   r   r   r   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 3. Number of bins must be at least 2, and must be an int.) @r   r<   r   z{KBinsDiscretizer received an invalid number of bins at indices 0, 2. Number of bins must be at least 2, and must be an int.)r)   fullr   r4   r5   r6   r(   r    )r   r#   r7   r$   r$   r%   test_invalid_n_bins_arrayL   s4   



"r>   )r   r   r   r   r;   )r   r   r   r   )r   r   r   r   )r   r   r   r   c                 C   s~   t g dd| djt|d}t||t ttjd }|jj|fks'J t	|j|j
D ]\}}|j|d fks<J q.d S )Nr   r   r   r   r   r   r   r   )r   r   r    r   r!   r)   r*   r/   
bin_edges_zipr+   )r   r"   r   r#   
n_features	bin_edgesr   r$   r$   r%   test_fit_transform_n_bins_arrayr   s   %
rD   z&ignore: Bins whose width are too smallc                  C   s   t dgdgdgdgdgdgg} tddd	d
}|j| g dd t|jd g d t|| dgdgdgdgdgdgg dS )z;Check the impact of `sample_weight` one computed quantiles.r
   r   r   r   i  i  
   r   r   r   )r   r   r   r   r   r   r   r   )r
   r   r   r           g      ?r:   N)r)   r*   r   r   r   r@   r!   r    r#   r$   r$   r%   *test_kbinsdiscretizer_effect_sample_weight   s
   ",rH   c                 C   sH   t dd| d}tjg dtjd}t|}|jt|d t|| dS )z7Make sure that `sample_weight` is not changed in place.r   r   r   )r   r   r   r   r,   r   N)r   r)   r*   float64copyr   r    r   )r   r#   r   sample_weight_copyr$   r$   r%   /test_kbinsdiscretizer_no_mutating_sample_weight   s
   
rM   )r   r   r   c                 C   s   t d tddgddgddgddgg}t| ddd}d	}tjt|d
 || W d    n1 s6w   Y  |j	d dksDJ |
|}t|d d df t|jd  d S )Nalwaysr   r
   r   r   r   r   )r   r   r   z2Feature 0 is constant and will be replaced with 0.r0   )warningssimplefilterr)   r*   r   r4   warnsUserWarningr   r+   r!   r   zerosr/   )r   r    r#   warning_messageXtr$   r$   r%   test_same_min_max   s   
"
&rV   c                  C   s   t d} tdd}tt ||  W d    n1 sw   Y  tdd}|| dd tt ||  W d    d S 1 sHw   Y  d S )Nr9   r   r'   r   r   )	r)   aranger   r4   r5   r6   r   reshaper!   rG   r$   r$   r%   test_transform_1d_behavior   s   


"rY   ir   	   c                 C   sX   t g ddd}t g ddd}|d|   }tddd|}t|| d S )	N)r:         @g      @g       @g      $@r   r   )r   r   r   r   r   rE   r   r   r   r   )r)   r*   rX   r   r(   r   )rZ   X_initXt_expectedr    rU   r$   r$   r%   test_numeric_stability   s
   r`   c                  C   s   t g dddt} | t}t g dddt} | t}t|r'J ttdd dD dd|| t g dd	dt} | t}t|sOJ ttd
d dD dd|	 |	  d S )Nr?   r   r]   onehot-densec                 S      g | ]}t |qS r$   r)   rW   .0rZ   r$   r$   r%   
<listcomp>       z'test_encode_options.<locals>.<listcomp>F)
categoriessparse_outputonehotc                 S   rb   r$   rc   rd   r$   r$   r%   rf      rg   T)
r   r   r    r!   spissparser   r   r(   toarray)r#   Xt_1Xt_2Xt_3r$   r$   r%   test_encode_options   s.   


rq   z8strategy, expected_2bins, expected_3bins, expected_5bins)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r9   r9   )r   r   r   r   r   r   )r   r   r   r   r   r9   )r   r   r   r   r   r   )r   r   r   r   r9   r9   c                 C   s   t g ddd}td| dd}||}t||  td| dd}||}t||  td| dd}||}t||  d S )	N)r   r   r   r   r[   rE   r   r   r   r   r   r   r   r      )r)   r*   rX   r   r(   r   ravel)r   expected_2binsexpected_3binsexpected_5binsr    r#   rU   r$   r$   r%   test_nonuniform_strategies   s   


rx   zstrategy, expected_inv)      r:         r   )r         @      r   )r   r\   ry   r   )r   r\   ry   r   )g      g      @g      g      )g      g      @g      g      ?)g      ?g      @g      g      ?)ry   r:   rz   g      )r   r{   r|   rF   )r   r\   ry   g      ?r   )r   rj   ra   c                 C   s0   t d| |d}|t}||}t|| d S )Nr   rr   )r   r(   r    inverse_transformr   )r   r   expected_invkbdrU   Xinvr$   r$   r%   test_inverse_transform  s   "

r   c                 C   s   t g dd d d f }td| dd}|| t ddgd d d f }||}t|jddd	 |j t|jdddg d S )
Nr   r   r   r   r9   r   rr   r
   rs   r   )axisr   )	r)   r*   r   r   r!   r   maxr+   min)r   r    r   X2X2tr$   r$   r%    test_transform_outside_fit_range<  s   

r   c                  C   s   t g dd d d f } |  }tddd}|| }t| | | }||}t|| t|t dgdgdgdgg d S )Nr   r   r   r]   r   r   r   )r)   r*   rK   r   r(   r   r}   )r    X_beforer#   rU   	Xt_beforer   r$   r$   r%   test_overwriteH  s   



$r   zstrategy, expected_bin_edges)r   r   r   )r   r   r   c                 C   sz   dgdgdgdgdgdgg}t d| d d}d}tjt|d || W d    n1 s.w   Y  t|jd | d S )Nr   r   )r   r   	subsample'Consider decreasing the number of bins.r0   )r   r4   rQ   rR   r   r   r@   )r   expected_bin_edgesr    r   rT   r$   r$   r%   test_redundant_binsV  s   r   c                  C   s   t g ddd} t g d}t g ddd}tdddd	}d
}tjt|d ||  W d    n1 s=w   Y  t|j	d | t|
| | d S )N)皙?r   ffffff?r   r   )r   gq=
ףp?g=
ףp=?gzG?gp=
ף?r   )r   r   r9   rE   r   r   r   r   r0   r   )r)   r*   rX   r   r4   rQ   rR   r   r   r@   r!   )r    rC   rU   r   rT   r$   r$   r%   !test_percentile_numeric_stabilityb  s   r   in_dtype	out_dtypec                 C   sr   t jt| d}td||d}|| |d ur|}n|d u r(|jt jkr(t j}n|j}||}|j|ks7J d S NrI   r   )r   r   r,   )	r)   r*   r    r   r   r,   float16rJ   r!   )r   r   r   X_inputr   expected_dtyperU   r$   r$   r%   test_consistent_dtypeo  s   

r   input_dtypec                 C   sd   t jt| d}td|t jd}|| ||}td|t jd}|| ||}t|| d S r   )	r)   r*   r    r   float32r   r!   rJ   r   )r   r   r   kbd_32Xt_32kbd_64Xt_64r$   r$   r%   test_32_equal_64  s   



r   c                  C   s   t g ddd} tdddd}||  t|}|jd d ||  t|jd	 |jd	 D ]\}}t j	
|| q1|jj|jjksGJ d S )
Nr	   r   r   rE   r   r   r   r   r   )r)   r*   rX   r   r   r   
set_paramsrA   r@   testingr   r/   )r    kbd_defaultkbd_without_subsamplingbin_kbd_defaultbin_kbd_with_subsamplingr$   r$   r%   'test_kbinsdiscretizer_subsample_default  s   

r   zencode, expected_namesrj   c                 C   .   g | ]}t d D ]}d| dt| qqS r9   feat_rangefloatre   col_idbin_idr$   r$   r%   rf         rf   r   ra   c                 C   r   r   r   r   r$   r$   r%   rf     r   r   c                 C      g | ]}d | qS r   r$   )re   r   r$   r$   r%   rf     rg   c                 C   sz   g dg dg dg dg}t d| d|}||}dd td	D }||}|jd
 |jd ks6J t|| dS )z[Check get_feature_names_out for different settings.
    Non-regression test for #22731
    )r
   r   r   )r   r   r   )r   r   r
   )r   r9   r   r9   r]   c                 S   r   r   r$   rd   r$   r$   r%   rf     rg   z>test_kbinsdiscrtizer_get_feature_names_out.<locals>.<listcomp>r   r   r   N)r   r   r!   r   get_feature_names_outr/   r   )r   expected_namesr    r   rU   input_featuresoutput_namesr$   r$   r%   *test_kbinsdiscrtizer_get_feature_names_out  s   

r   c                 C   sj   t j|dd }t| d|d}|| t|}|jd d || t|j	d |j	d dd d S )	N)i r   r   iP  )r   r   random_stater   r   g{Gz?)rtol)
r)   randomRandomStaterandom_sampler   r   r   r   r   r@   )r   global_random_seedr    kbd_subsamplingkbd_no_subsamplingr$   r$   r%   test_kbinsdiscretizer_subsample  s   


r   c                  C   s  t dd d d f } t }|| } tjtdd |  W d    n1 s)w   Y  tjtdd |j| | d W d    n1 sGw   Y  tj	dd t
d ||  W d    n1 sgw   Y  tjtd	d |j| d
 W d    d S 1 sw   Y  d S )NrE   z$Missing required positional argumentr0   z$Cannot use both X and Xt. Use X only)r    rU   T)recorderrorzXt was renamed X in version 1.5)rU   )r)   rW   r   r(   r4   r5   	TypeErrorr}   rO   catch_warningsrP   rQ   FutureWarning)r    r   r$   r$   r%   )test_KBD_inverse_transform_Xt_deprecation  s    


"r   )0rO   numpyr)   r4   scipy.sparsesparserk   sklearnr   sklearn.preprocessingr   r   sklearn.utils._testingr   r   r   r   r    markparametrizer&   r.   r8   r>   rD   filterwarningsrH   rM   rV   rY   r   r`   rq   rx   r   r   r   r   r   r   r   rJ   r   r   r   r   r   r   r$   r$   r$   r%   <module>   s      
!
&   
$


	





 

		

