o
    ?Hh(                    @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2m3Z3 dd Z4dd Z5dd Z6ej78dg dej78de2dd Z9ej78dg ddd Z:ej78dg dd d! Z;ej78dg dej78d"e1d#d$ Z<d%d& Z=d'd( Z>ej78d"e1d)d* Z?ej78d"e1d+d, Z@ej78dd-d.gej78d/deAeBgd0d1 ZCej78dd-d.gej78d2d3d4gd5d6 ZDej78dd7d8gej78d/eBeEd9eEd:gd;d< ZFej78d"e1d=d> ZGej78d?dejHd@dAd gdBdC ZIej78d/eAdDgdEdF ZJej78dGdHdIejHfgdJdK ZKdLdM ZLej78dNe2ejMg dOdP ZNej78d?dejHd@dAd gdQdR ZOej78d/eAdDgdSdT ZPej78dUdVgdWggdVgejHgggdXdY ZQdZd[ ZRd\d] ZSd^d_ ZTd`da ZUdbdc ZVej78ddg dedfdg ZWej78dhde e e e  gdidj ZXdkdl ZYdmdn ZZdodp Z[ej78dg ddqdr Z\dsdt Z]dudv Z^dwdx Z_ej78dydzd{gd|d} Z`d~d Zadd Zbdd Zcej7j8dd dedd gdz dgdz gfddedeje gdz ejegdz gfeje ejeedeje gdz ejegdz gfg dg dedg dg dgfdeje dgddejegeddeje dgddejeggfgg dddd Zfej78ddejeeje dfdd{gg ddfg dddgdfgdd Zgej7j8dddgeje ejegfddgdgd dgd gfgddgddd Zhej78dddgdd Ziej78dddVejjjkdVdgej78dddVejjjkdVdgdd Zlej78dedddVgdVdWggedddVgdVdggddddfejdddgddggeBdejdddgddggeBdi dfgdd Zmdd Znej78den ej78dddzedg dfddzedg dfgdd Zoej78de1e2 e0 e3 e/ dd Zpej78dg dej78dejdd fgeqee1e2 e0 e3 e/ ejHg ddĄ ZrddƄ Zsej78dejdddgddggeAddejdg dȢg dɢgeAdfedejHdIgdIejHggejHedg dʢg dˢgfejdejHdgdejHggeAdejHejdg dȢg dɢgeAdfejdddgddggeAddejdg dȢg dɢgeAdfgdd̈́ Ztej78deegej78ddejHdfdgddԄ Zuddք Zvej78de2dd؄ Zwej78deegddڄ Zxej78de1e2 e0 e3 e/ dd܄ Zyej78dddgdd Zzej78ddg dfdg dfgdd Z{ej78ddejHgdd Z|ej78ddejHgdd Z}ej78ddg deAddWfdg deAddVfdddgeAddWfdg deAddWfdg de~ddWfdVg de~ddVfdg de~ddWfdVg de~ddWfgdd Zej78dg ddd Zd d Zdd Zej78dddgdd Zdd Zd	d
 Zdd Zej78dejejgdd Zej78dddgej78dddgdd Zej78dddgej78dg dej78dddgdd Zej78d"e1dd Zdd Zej78dg ddd Zej78dg dej78dedg dg dgedejHdWdzdgejHd d!d"ggeddVdWdzejHgd{d d!ejHgggd#d$ ZdS (%      N)productsparse)kstest)tree)load_diabetes)DummyRegressor)ConvergenceWarning)enable_iterative_imputer)IterativeImputer
KNNImputerMissingIndicatorSimpleImputer)_most_frequent)ARDRegressionBayesianRidgeRidgeCV)GridSearchCV)Pipeline
make_union)_sparse_random_matrix)_convert_containerassert_allcloseassert_allclose_dense_sparseassert_array_almost_equalassert_array_equal)BSR_CONTAINERSCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSLIL_CONTAINERSc                 C      t | | | j|jksJ d S N)r   dtypexy r'   `/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/impute/tests/test_impute.py"_assert_array_equal_and_same_dtype(      
r)   c                 C   r!   r"   )r   r#   r$   r'   r'   r(   _assert_allclose_and_same_dtype-   r*   r+   c           
      C   s   d||f }t }| jjdks|jjdkrt}t||d}|| |  }	||j||	dd ||	||	dd t||d}|||  |||  }	t
|	rZ|	 }	||j||	dd ||	||	dd dS )zUtility function for testing imputation for a given strategy.

    Test with dense and sparse arrays

    Check that:
        - the statistics (mean, median, mode) are correct
        - the missing values are imputed correctlyz<Parameters: strategy = %s, missing_values = %s, sparse = {0}fmissing_valuesstrategyF)err_msgTN)r   r#   kindr   r   fit	transformcopystatistics_formatr   issparsetoarray)
XX_truer/   
statisticsr.   sparse_containerr0   	assert_aeimputerX_transr'   r'   r(   _check_statistics2   s$   
r@   r/   )meanmedianmost_frequentconstantcsr_containerc                 C   s   t jdd}t j|d d d< t| d}|||}|jdks"J ||}|jdks.J t| d}||}|jdks?J d S )N
      r/   )rF   rG   )initial_strategy)nprandomrandnnanr   fit_transformshaper   )r/   rE   r9   r>   	X_imputediterative_imputerr'   r'   r(   test_imputation_shapeY   s   



rR   rA   rB   rC   c                 C   sj   t d}t j|d d df< t| d|}tjtdd || W d    d S 1 s.w   Y  d S )N      r   rH   Skippingmatch)	rJ   onesrM   r   r2   pytestwarnsUserWarningr3   )r/   r9   r>   r'   r'   r(    test_imputation_deletion_warningk   s   
"r^   c                 C   s   t d}tj}tjg dtd}|j||d|gd|ddgg|d}t| d	|}t	|j
| t jtd
d || W d    d S 1 sHw   Y  d S )Npandasabcdr#         rG   rF   columnsrH   z6Skipping features without any observed values: \['b'\]rX   )r[   importorskiprJ   rM   arrayobject	DataFramer   r2   r   feature_names_in_r\   r]   r3   )r/   pdr.   feature_namesr9   r>   r'   r'   r(   .test_imputation_deletion_warning_feature_namesu   s    


"rq   csc_containerc                 C   s   t d}d|d< ||}t| dd}tjtdd || W d    n1 s*w   Y  ||  tjtdd || W d    d S 1 sNw   Y  d S )NrT   r   )r/   r.   zProvide a dense arrayrX   )	rJ   rZ   r   r[   raises
ValueErrorr2   r8   r3   )r/   rr   r9   r>   r'   r'   r(   test_imputation_error_sparse_0   s   
"ru   c                 O   >   t | dr| jnt| }|dkrtjS tj| g|R i |S Nsizer   )hasattrrx   lenrJ   rM   rB   arrargskwargslengthr'   r'   r(   safe_median      &r   c                 O   rv   rw   )ry   rx   rz   rJ   rM   rA   r{   r'   r'   r(   	safe_mean   r   r   c              
   C   sr  t jd}d}d}|| || f}t |d }t d|d d }|dd d  |dd d< dt jdd fd	t jd
d fg}|D ]\}}	}
t |}t |}t |d }t|d D ]}|| d dk|| d  || d  }t|d ||  || ||   d}|d | | }|d | }t 	|	|}||
t|d |  }|
|||||< t |||f|d d |f< d|	krt |t 	|| || f|d d |f< nt ||t 	|| |f|d d |f< t j||d d |f  t j||d d |f  q^|d	krt |jdd }n
t |jdd }|d d |f }t|||||	|  qBd S )Nr   rF   rf   rg   rG   rA   c                 S      t t| |fS r"   )r   rJ   hstackzvpr'   r'   r(   <lambda>       z-test_imputation_mean_median.<locals>.<lambda>rB   c                 S   r   r"   )r   rJ   r   r   r'   r'   r(   r      r   axis)rJ   rK   RandomStatezerosarangerM   emptyrangemaxrepeatpermutationrz   r   shuffleisnananyallr@   )rr   rngdimdecrO   r   valuestestsr/   test_missing_valuestrue_value_funr9   r:   true_statisticsjnb_zerosnb_missing_values	nb_valuesr   r   r   cols_to_keepr'   r'   r(   test_imputation_mean_median   sP   

(& 
r   c                 C   s   t dt jt jgdt jt jgddt jgddt jgddt jgddt jgddt jgddt jgg }t g dg d	g dg d
g dg dg dg dg }g d}t||d|t j|  d S )Nr   rV   rg   rG   r   r   r   )rV   rV   rV   )r   r         )r   rV         @)rg   rV         @)r   r         )r   rG         ?)r   rV   r   r   r   r   r   r   rB   )rJ   rk   rM   	transposer@   )rr   r9   X_imputed_medianstatistics_medianr'   r'   r(   $test_imputation_median_special_cases   s<   





r   rA   rB   r#   c                 C   sl   t jg dg dg dg|d}d}tjt|d t| d}|| W d    d S 1 s/w   Y  d S )Nra   rb   rU   rg   e   gh	   re   4non-numeric data:
could not convert string to float:rX   rH   )rJ   rk   r[   rs   rt   r   rN   )r/   r#   r9   msgr>   r'   r'   r(   .test_imputation_mean_median_error_invalid_type  s    
"r   typelist	dataframec                 C   s~   g dg dg dg}|dkrt d}||}d}t jt|d t| d}|| W d    d S 1 s8w   Y  d S )	Nr   r   r   r   r_   r   rX   rH   )r[   rj   rm   rs   rt   r   rN   )r/   r   r9   ro   r   r>   r'   r'   r(   :test_imputation_mean_median_error_invalid_type_list_pandas  s   


"r   rD   rC   USc                 C   s   t jt jt jddgt jdt jdgt jddt jgt jdddgg|d}d}tjt|d	 t| d
}||| W d    d S 1 sDw   Y  d S )Nra   r,   rc   rd   rb   r   re   z#SimpleImputer does not support datarX   rH   )	rJ   rk   rM   r[   rs   rt   r   r2   r3   )r/   r#   r9   r0   r>   r'   r'   r(   /test_imputation_const_mostf_error_invalid_types,  s   

"r   c                 C   sd   t g dg dg dg dg}t g dg dg dg dg}t||d	t jd
ddgd|  d S )N)r   r   r   rV   )r   rG   r   rU   )r   rf   rU   r   )r   rG   rU      )rG   r   rV   )rG   rU   rU   )rf   rU   rU   )rG   rU   r   rC   rG   rU   r   )rJ   rk   r@   rM   )rr   r9   r:   r'   r'   r(   test_imputation_most_frequentA  s   	 r   markerNAN c                 C   s   t j| | ddg| d| dg| dd| g| dddggtd}t jg dg d	g d
g dgtd}t| dd}|||}t|| d S )Nra   r,   rc   rd   rb   r   re   )rc   ra   r,   )rc   rd   rd   )rb   rd   rd   )rc   rd   r   rC   r-   )rJ   rk   rl   r   r2   r3   r   r   r9   r:   r>   r?   r'   r'   r(   %test_imputation_most_frequent_objects]  s&   





r   categoryc                 C   sj   t d}td}|j|| d}tjg dg dg dg dgtd}tdd	}|	|}t
|| d S )
Nr_   ,Cat1,Cat2,Cat3,Cat4
,i,x,
a,,y,
a,j,,
b,j,x,re   )ra   ir%   )ra   r   r&   )ra   r   r%   )rb   r   r%   rC   rH   r[   rj   ioStringIOread_csvrJ   rk   rl   r   rN   r   r#   ro   r,   dfr:   r>   r?   r'   r'   r(   $test_imputation_most_frequent_pandasz  s   



r   zX_data, missing_value)rf   r         ?c                 C   s   t jd| td}||d< d}d|dt|d}tjtt|d t	|d	|d
}|
| W d    d S 1 s;w   Y  d S )NrT   re   r   r   r%   fill_value=
 (of type ) cannot be castrX   rD   r.   r/   
fill_value)rJ   fullfloatr   r[   rs   rt   reescaper   rN   )X_datamissing_valuer9   r   r0   r>   r'   r'   r(   +test_imputation_constant_error_invalid_type  s   "r   c                  C   sl   t g dg dg dg dg} t g dg dg dg dg}td	d
ddd}|| }t|| d S )Nr   rG   rU   r   rg   r   rV   r   r   r   r   r      r   r   r   )r   rG   rU   r   )rg   r   rV   r   )r   r   r   r   )r   r   r   r   r   rD   r   Tr.   r/   r   keep_empty_features)rJ   rk   r   rN   r   )r9   r:   r>   r?   r'   r'   r(    test_imputation_constant_integer  s   ""
r   array_constructorc              	   C   s   t t jddt jgdt jdt jgddt jt jgdddt jgg}t g dg dg d	g d
g}| |}| |}tdddd}||}t|| d S )N皙?r   333333??ffffff?      ?)r   r   r   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   rD   r   Tr/   r   r   )rJ   rk   rM   r   rN   r   )r   r9   r:   r>   r?   r'   r'   r(   test_imputation_constant_float  s"   	
r   c                 C   s   t j| dd| gd| d| gdd| | gddd	| ggtd
}t jg dg dg dg dgtd
}t| dddd}||}t|| d S )Nra   rb   rc   rd   r   r,   r   r   r   re   )missingra   rb   r   )rc   r   rd   r   )r   r,   r   r   )r   r   r   r   rD   r   Tr   )rJ   rk   rl   r   rN   r   r   r'   r'   r(   test_imputation_constant_object  s0   






r   c                 C   sl   t d}td}|j|| d}tjg dg dg dg dgtd}tdd	d
}|	|}t
|| d S )Nr_   r   re   )r   r   r%   r   )ra   r   r&   r   )ra   r   r   r   )rb   r   r%   r   rD   Tr/   r   r   r   r'   r'   r(   test_imputation_constant_pandas  s   



r   r9   rf   rG   c                 C   sf   t  | }|jdksJ t  }|dgdgg |jdks J |dgtjgg |jdks1J d S )Nr   rf   rG   )r   r2   n_iter_rJ   rM   r9   r>   r'   r'   r(   "test_iterative_imputer_one_feature
  s   r   c                  C   sr   t dddd} | jd }tdt|dfdtjddfg}d	g d
i}t dddd }t||}|| | d S )Nd   皙?)densityr   r>   r.   r   random_stateimputer__strategyrS   rf   )	r   datar   r   r   DecisionTreeRegressorr8   r   r2   )r9   r.   pipeline
parametersYgsr'   r'   r(   $test_imputation_pipeline_grid_search  s   

r	  c                  C   st  t ddddd} |   }tdddd}|||}d|d	< t||kr*J |  }t|jd ddd}|||}d|jd< t|j|jkrPJ |   }tddd
d}|||}d|d	< t	|| |  
 }t|jd dd
d}|||}d|jd< t	|j|j |  }t|jd dd
d}|||}d|jd< t|j|jkrJ d S )NrV   g      ?r   r   r  rA   T)r.   r/   r4   r   r   F)r   r4   r8   r   r2   r3   rJ   r   r  r   tocsc)X_origr9   r>   Xtr'   r'   r(   test_imputation_copy)  s4   



r  c                  C   s   t jd} d}d}t||d| d }|dk}t j||< tdd}||}t||j	
| tdd|}t |
||j	
|krHJ d|_t|
||j	
| d S )Nr   r   rF   r   r
  )max_iterrV   )rJ   rK   r   r   r8   rM   r   rN   r   initial_imputer_r3   r2   r   r   )r   nrd   r9   missing_flagr>   rP   r'   r'   r(   !test_iterative_imputer_zero_itersT  s   


 r  c                  C   sp   t jd} d}d}t||d| d }tdddd}|| || tdddd}|| || d S )	Nr   r   rU   r   r
  rf   )r.   r  verboserG   )rJ   rK   r   r   r8   r   r2   r3   )r   r  rd   r9   r>   r'   r'   r(   test_iterative_imputer_verbosel  s   


r  c                  C   sB   d} d}t | |f}tddd}||}t||j| d S )Nr   rU   r   rf   )r.   r  )rJ   r   r   rN   r   r  r3   )r  rd   r9   r>   rP   r'   r'   r(   "test_iterative_imputer_all_missingz  s   
r  imputation_order)rK   roman	ascending
descendingarabicc           
      C   sT  t jd}d}d}d}t||d|d }d|d d df< td|dd	d
ddd| |d
}|| dd |jD }t||j	 |j
ksEJ | dkr^t |d |d  t d|ks\J d S | dkrzt |d |d  t |d ddksxJ d S | dkr|d |d  }||d d  }	||	ksJ d S d| v rt|||d  ksJ d S d S )Nr   r   rF   rG   r   r
  rf   rV   FT)
r.   r  n_nearest_featuressample_posteriorskip_complete	min_value	max_valuer  r  r  c                 S      g | ]}|j qS r'   feat_idx).0r   r'   r'   r(   
<listcomp>  r   z;test_iterative_imputer_imputation_order.<locals>.<listcomp>r  r  r   rK   ending)rJ   rK   r   r   r8   r   rN   imputation_sequence_rz   r   n_features_with_missing_r   r   )
r  r   r  rd   r  r9   r>   ordered_idxordered_idx_round_1ordered_idx_round_2r'   r'   r(   'test_iterative_imputer_imputation_order  sB   
*0r,  	estimatorc           	      C   s   t jd}d}d}t||d|d }tdd| |d}|| g }|jD ]}| d ur0t| ntt	 }t
|j|s=J |t|j q&tt|t|ksRJ d S )Nr   r   rF   r   r
  rf   )r.   r  r-  r  )rJ   rK   r   r   r8   r   rN   r'  r   r   
isinstancer-  appendidrz   set)	r-  r   r  rd   r9   r>   hashestripletexpected_typer'   r'   r(   !test_iterative_imputer_estimators  s   

r5  c                  C   s   t jd} d}d}t||d| d }tdddd| d}||}tt ||dk d tt 	||dk d t||dk ||dk  d S )	Nr   r   rF   r   r
  rf   皙?)r.   r  r  r   r  
rJ   rK   r   r   r8   r   rN   r   minr   r   r  rd   r9   r>   r  r'   r'   r(   test_iterative_imputer_clip  s   

r:  c                  C   s   t jd} d}d}t||d| d }d|d d df< tdddd	dd
dd| d	}||}tt ||dk d tt 	||dk d
 t||dk ||dk  d S )Nr   r   rF   r   r
  rf   rG   rV   Tr6  rK   )	r.   r  r  r  r  r   r  r  r  r7  r9  r'   r'   r(   %test_iterative_imputer_clip_truncnorm  s(   
r;  c                     s   t jd} | jdd t j d d< tddd| d  t  fdd	td
D }t	|dks7J t	|dks?J |
 | }}t|| | d\}}|dkr[|d7 }t|| | d\}}|dk sr|dkstJ dd S d S )N*   )rV   rV   )rx   r   r   T)r  r   r  r  c                    s   g | ]}  d  d  qS r   )r3   )r$  _r   r'   r(   r%    s    zEtest_iterative_imputer_truncated_normal_posterior.<locals>.<listcomp>r   normg-q=r6  r   z&The posterior does appear to be normal)rJ   rK   r   normalrM   r   rN   rk   r   r   rA   stdr   )r   imputationsmusigmaks_statisticp_valuer'   r   r(   1test_iterative_imputer_truncated_normal_posterior  s    
 rG  c                 C   s   t jd}d}d}|jdd||fd}|jdd||fd}d|d d df< d|d< tdd| |d|}td| d	|}t||d d df ||d d df  d S )
Nr   r   rF   rU   )lowhighrx   rf   r   )r.   r  rI   r  r-   )	rJ   rK   r   randintr   r2   r   r   r3   )r/   r   r  rd   X_trainX_testr>   initial_imputerr'   r'   r(   +test_iterative_imputer_missing_at_transform  s    (rN  c                  C   s   t jd} t jd}d}d}t||d| d }tddd| d}|| ||}||}t |t	
t |ks@J tddd	d d
| d}tddd	d d
|d}	|| |	| ||}
||}|	|}t|
| t|
| d S )Nr   rf   r   rF   r   r
  T)r.   r  r  r  Fr  )r.   r  r  r  r  r  )rJ   rK   r   r   r8   r   r2   r3   rA   r[   approxr   )rng1rng2r  rd   r9   r>   
X_fitted_1
X_fitted_2imputer1imputer2X_fitted_1aX_fitted_1br'   r'   r(   .test_iterative_imputer_transform_stochasticity'  sF   


	





rX  c                  C   s   t jd} | dd}t j|d d df< td| d}td| d}|||}||}t	|d d dd f | t	|| d S )Nr   r   rF   )r  r  rf   )
rJ   rK   r   randrM   r   r2   r3   rN   r   )r   r9   m1m2pred1pred2r'   r'   r(   !test_iterative_imputer_no_missingY  s   
r^  c            	      C   s   t jd} d}| |d}| d|}t ||}| ||dk }| }t j||< tdd| d}||}t	||dd d S )	Nr   2   rf   r   rV   r  r  r  g{Gz?atol)
rJ   rK   r   rY  dotr4   rM   r   rN   r   )	r   rd   ABr9   nan_mask	X_missingr>   X_filledr'   r'   r(   test_iterative_imputer_rank_oneg  s   

ri  rankrU   rV   c                 C   s   t jd}d}d}||| }|| |}t ||}|||dk }| }t j||< |d }|d | }	||d  }
||d  }tddd|d|	}|	|}t
|
|d	d
 d S )Nr   F   r   rG   rV   r  rf   )r  r  r  r  r   ra  )rJ   rK   r   rY  rc  r4   rM   r   r2   r3   r   )rj  r   r  rd   rd  re  rh  rf  rg  rK  X_test_filledrL  r>   
X_test_estr'   r'   r(   )test_iterative_imputer_transform_recoveryv  s(   

rn  c               	   C   s  t jd} d}d}| ||}| ||}t |j}t|D ])}t|D ]"}|d d || | f  |d d |f |d d |f  d 7  < q&q | ||dk }| }	t j	|	|< |d }|	d | }
||d  }|	|d  }t
dd| d|
}||}t||dd	d
 d S )Nr   r   rF   rG   g      ?rf   r`  gMbP?{Gz?)rtolrb  )rJ   rK   r   rL   r   rO   r   rY  r4   rM   r   r2   r3   r   )r   r  rd   rd  re  rh  r   r   rf  rg  rK  rl  rL  r>   rm  r'   r'   r(   &test_iterative_imputer_additive_matrix  s(   B

rq  c                  C   s   t jd} d}d}| |d}| d|}t ||}| ||dk }| }t j||< tdddd| d	}||}	t	|j
||j ksGJ t|jdd| d
}||}
t|	|
dd tdddd| d	}|| |j|jksrJ d S )Nr   r_  rV   rf   r   r   ro  F)r  tolr  r  r  )r  r  r  r  gHz>ra  )rJ   rK   r   rY  rc  r4   rM   r   rN   rz   r'  r   r   r2   r  )r   r  rd   rd  re  r9   rf  rg  r>   X_filled_100X_filled_earlyr'   r'   r(   %test_iterative_imputer_early_stopping  s0   






ru  c            
      C   s   t dd\} }| j\}}d| d d df< tjd}d}t|D ]}|jt|t|| dd}tj	| ||f< q t
d	dd
}t  tdt || |}	W d    n1 sZw   Y  tt|	riJ d S )NT)
return_X_yrf   rU   r   g333333?F)rx   replacerV   )r  r  error)r   rO   rJ   rK   r   r   choicer   intrM   r   warningscatch_warningssimplefilterRuntimeWarningrN   r   r   )
r9   r&   	n_samples
n_featuresr   missing_ratefeat
sample_idxr>   X_fillr'   r'   r(   $test_iterative_imputer_catch_warning  s    

r  z$min_value, max_value, correct_outputr   )r   rV   rF   )r      i,  r   rF   r  )scalarszNone-defaultinflistszlists-with-inf)idsc                 C   s   t jddd}t| |d}|| t|jt jr#t|j	t js%J |jj
d |j
d kr;|j	j
d |j
d ks=J t|dd d f |j t|dd d f |j	 d S )Nr   rF   rU   r  r   rf   )rJ   rK   r   rL   r   r2   r.  
_min_valuendarray
_max_valuerO   r   )r  r   correct_outputr9   r>   r'   r'   r(   )test_iterative_imputer_min_max_array_like  s   
r  zmin_value, max_value, err_msg)r   r   min_value >= max_value.r  )r   r  r   z_value' should be of shape)r   rV   rV   c                 C   sV   t jd}t| |d}tjt|d || W d    d S 1 s$w   Y  d S )NrF   rU   r  rX   )rJ   rK   r   r[   rs   rt   r2   )r  r   r0   r9   r>   r'   r'   r(   *test_iterative_imputer_catch_min_max_error  s
   "r  zmin_max_1, min_max_2irg   zNone-vs-infzScalar-vs-vectorc              	   C   s   t t jdddgdt jt jdgddt jdgt jddt jgg}t t jdt jdgddt jt jgt jdddgg}t| d | d dd	}t|d |d dd	}|||}|||}t|d d df |d d df  d S )
NrG   rf   rF   r   rU   rg   rV   r   )r  r   r  )rJ   rk   rM   r   r2   r3   r   )	min_max_1	min_max_2rK  rL  rT  rU  X_test_imputed1X_test_imputed2r'   r'   r(   4test_iterative_imputer_min_max_array_like_imputation  s&   *&r  r  TFc                 C   s   t jd}t g dg dg dg dg}t t jdddgt jdd	dgt jd	d
d	gg}td| |d}|||}| rUt|d d df t 	|d d df  d S t|d d df g ddd d S )Nr   )rV   rG   rG   rf   )rF   rf   rG   r   )rU   rf   rf   rf   )r   rg   rG   rG   rG   rg   rV   rf   rF   rA   )rI   r  r  )   r      g-C6?)rp  )
rJ   rK   r   rk   rM   r   r2   r3   r   rA   )r  r   rK  rL  r>   rm  r'   r'   r(   'test_iterative_imputer_skip_non_missing0  s   ".,"r  
rs_imputer)seedrs_estimatorc                 C   sH   G dd d}||d}t | d}td}|| |j|ks"J d S )Nc                   @   s$   e Zd Zdd Zdd Zdd ZdS )zCtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimatorc                 S   s
   || _ d S r"   r   )selfr  r'   r'   r(   __init__G  s   
zLtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.__init__c                 _   s   | S r"   r'   )r  r}   kgardsr'   r'   r(   r2   J  s   zGtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.fitc                 S   s   t |jd S )Nr   )rJ   r   rO   )r  r9   r'   r'   r(   predictM  s   zKtest_iterative_imputer_dont_set_random_state.<locals>.ZeroEstimator.predictN)__name__
__module____qualname__r  r2   r  r'   r'   r'   r(   ZeroEstimatorF  s    r  r   r  )r   rJ   r   r2   r  )r  r  r  r-  r>   rK  r'   r'   r(   ,test_iterative_imputer_dont_set_random_stateC  s   




r  zX_fit, X_trans, params, msg_errr   missing-onlyauto)featuresr   zBhave missing values in transform but have no missing values in fitra   rb   rc   re   z1MissingIndicator does not support data with dtypec                 C   s^   t dd}|jdi | tjt|d || | W d    d S 1 s(w   Y  d S )Nr   r   rX   r'   )r   
set_paramsr[   rs   rt   r2   r3   )X_fitr?   paramsmsg_err	indicatorr'   r'   r(   test_missing_indicator_errorW  s
   
"r  c                  C   sN   dt jft jt jfdt jfg} t jgt t t t t	 }dd t
|| D S )Nr   r   c                 S   s0   g | ]\}\}}|d kr|t jus|||fqS r=  )rJ   rk   )r$  arr_typer.   r#   r'   r'   r(   r%  y  s
    
z5_generate_missing_indicator_cases.<locals>.<listcomp>)rJ   int32rM   float64rk   r   r   r   r    r   r   )missing_values_dtypes	arr_typesr'   r'   r(   !_generate_missing_indicator_caseso  s$   r  zarr_type, missing_values, dtypez,param_features, n_features, features_indicesr   rf   rG   r   c                 C   s  t | | dgdd| gg}t | | dgg dg}t g dg dg}t g dg dg}	|||}|||}||}|	|}	t| |dd	}
|
|}|
|}|jd |ksaJ |jd |ksjJ t|
j| t	||d d |f  t	||	d d |f  |j
tksJ |j
tksJ t|t jsJ t|t jsJ |
jd
d |
|}|
|}|j
tksJ |j
tksJ |jdksJ |jdksJ t	| | t	| | d S )Nrf   rg   rG   rg   r  rF   )rf   rf   r   )r   r   rf   r   F)r.   r  r   Tr   csc)rJ   rk   astyper   rN   r3   rO   r   	features_r   r#   boolr.  r  r  r6   r8   )r.   r  r#   param_featuresr  features_indicesr  r?   X_fit_expectedX_trans_expectedr  
X_fit_maskX_trans_maskX_fit_mask_sparseX_trans_mask_sparser'   r'   r(   test_missing_indicator_new  s>   






r  r  c                 C   s   d}t ||dgd|dgg}t ||dgg dg}| |}| |}t|d}tjtdd || W d    n1 s?w   Y  || tjtdd || W d    d S 1 saw   Y  d S )	Nr   rf   rg   rG   r  r   z"Sparse input with missing_values=0rX   )rJ   rk   r   r[   rs   rt   rN   r3   )r  r.   r  r?   X_fit_sparseX_trans_sparser  r'   r'   r(   5test_missing_indicator_raise_on_sparse_with_missing_0  s   

"r  param_sparse)TFr  zarr_type, missing_valuesc                 C   sH  t ||dgd|dgg}t ||dgg dg}| |t j}| |t j}t||d}||}||}|du rM|jdksDJ |jdksKJ d S |dkrg|d	krgt|t j	s]J t|t j	seJ d S |d
u r}t|t j	ssJ t|t j	s{J d S t
|r|jdksJ |jdksJ d S t|t j	sJ t|t j	sJ d S )Nrf   rg   rG   r  )r.   r   Tr  r  r   F)rJ   rk   r  r  r   rN   r3   r6   r.  r  r   r7   )r  r.   r  r  r?   r  r  r  r'   r'   r(   #test_missing_indicator_sparse_param  s*   


r  c                  C   sP   t jg dg dgtd} tddd}|| }t|t g dg dg d S )	Nra   rb   rc   )rb   rc   ra   re   ra   r   )r.   r  )TFF)FFT)rJ   rk   rl   r   rN   r   )r9   r  r?   r'   r'   r(   test_missing_indicator_string  s   
 r  zX, missing_values, X_trans_exp)rb   rb   TF)rb   rb   FT)r   r   TF)r   r   FTc                 C   s0   t t|ddt|d}|| }t|| d S )NrC   r-   r   )r   r   r   rN   r   )r9   r.   X_trans_exptransr?   r'   r'   r(   #test_missing_indicator_with_imputer  s   

r  imputer_constructorz.imputer_missing_values, missing_value, err_msgNaNzInput X contains NaN)z-1r   z(types are expected to be both numerical.c                 C   sh   t jd}|dd}||d< | |d}tjt|d || W d    d S 1 s-w   Y  d S )Nr<  rF   r   r   rX   )rJ   rK   r   rL   r[   rs   rt   rN   )r  imputer_missing_valuesr   r0   r   r9   r>   r'   r'   r(   (test_inconsistent_dtype_X_missing_values#  s   
"r  c                  C   sB   t ddgddgg} tddd}|| }|jd dksJ d S )Nrf   r  r   r  r.   r   )rJ   rk   r   rN   rO   )r9   mir  r'   r'   r(   !test_missing_indicator_no_missing:  s   
r  c                 C   sH   | g dg dg dg}t ddd}||}| | ks"J d S )Nr  )rf   rG   r   )rG   r   rf   r   rf   r  )r   rN   getnnzsum)rE   r9   r  r  r'   r'   r(   /test_missing_indicator_sparse_no_explicit_zerosE  s   
r  c                 C   s8   t ddgddgg}|  }|| |jd u sJ d S )Nrf   )rJ   rk   r2   
indicator_)r  r9   r>   r'   r'   r(   test_imputer_without_indicatorQ  s   
r  c                 C   s   | t jddgdt jdgddt jgg dg}t g dg dg d	g d
g}tt jdd}||}t|s:J |j|jksBJ t|	 | d S )Nrf   rV   rG   r   rU   )rf   rG   r   )      @r         @r           r  )       @r  r   r  r   r  )g      @r  r  r  r  r   )r   r  g      "@r  r  r  T)r.   add_indicator)
rJ   rM   rk   r   rN   r   r7   rO   r   r8   )r  X_sparser:   r>   r?   r'   r'   r(   2test_simple_imputation_add_indicator_sparse_matrixZ  s   ,	
r  zstrategy, expected)rC   rb   )rD   r   c                 C   sN   ddgdt jgg}t jddgd|ggtd}t| d}||}t|| d S )Nra   rb   rc   re   rH   )rJ   rM   rk   rl   r   rN   r   )r/   expectedr9   r:   r>   r?   r'   r'   r(   "test_simple_imputation_string_listq  s
   

r  zorder, idx_orderr  )rU   rg   rG   r   rf   r  )rf   r   rG   rg   rU   c                 C   s   t jd}|dd}t j|d ddf< t j|d ddf< t j|d dd	f< t j|d d
df< tt! td| dd	|}dd |j
D }||ksNJ W d    d S 1 sYw   Y  d S )Nr<  r   rV   r_  rf      r      rG   rF   rg   )r  r  r  c                 S   r!  r'   r"  )r$  r%   r'   r'   r(   r%    r   z)test_imputation_order.<locals>.<listcomp>)rJ   rK   r   rY  rM   r[   r\   r	   r   r2   r'  )order	idx_orderr   r9   trsidxr'   r'   r(   test_imputation_order  s   "r  r   c              	   C   s2  t d| ddgg ddd| dgddd| gg}t g d	d
d| dgd| ddgddd
| gg}t d| ddg| d| | gd
| d| g| d| dgg}t g d| d
| dgg d| d| d
gg}t| ddd}||}||}||}||}	t|| t|	| ||fD ]}
||
}||}t||
 qd S )Nr   rU   r   rg   r   rV   rg   r   r   r   r   )rV   rg   rG   rf   rG   rf   rg   rV   )rf   rf   rf   rU   )rG   rU   rU   rg   rA   T)r.   r/   r  )rJ   rk   r   rN   inverse_transformr3   r   )r   X_1X_2X_3X_4r>   	X_1_transX_1_inv_trans	X_2_transX_2_inv_transr9   r?   X_inv_transr'   r'   r(   (test_simple_imputation_inverse_transform  sT   


	


	



	

	







r  c              	   C   s   t d| ddgg ddd| dgddd| gg}t| d	d
}||}tjtd|j dd || W d    d S 1 s?w   Y  d S )Nr   rU   r   r  r   r   r   r   rA   r-   zGot 'add_indicator='rX   )	rJ   rk   r   rN   r[   rs   rt   r  r  )r   r  r>   r  r'   r'   r(   3test_simple_imputation_inverse_transform_exceptions  s   


	
"r  z)expected,array,dtype,extra_value,n_repeatextra_valuer  most_frequent_value)r  r  valuer  min_valuevalue)r  r  r  r   rf   rG   rU   )rf   rf   rG   )r  r  rf   )rf   rf   r  c                 C   s"   | t tj||d||ksJ d S )Nre   )r   rJ   rk   )r  rk   r#   r  n_repeatr'   r'   r(   test_most_frequent  s   r   rI   c                 C   sp   t dt jdgdt jt jgg}t| dd}||}t|dddf d ||}t|dddf d dS )zCheck the behaviour of the iterative imputer with different initial strategy
    and keeping empty features (i.e. features containing only missing values).
    rf   rG   rU   T)rI   r   Nr   )rJ   rk   rM   r   rN   r   r3   )rI   r9   r>   rP   r'   r'   r(   *test_iterative_imputer_keep_empty_features  s    

r  c                  C   sT   t g dg dg dg dg} d}tdd|dd	d
}||  t|jj| dS )z<Check that we propagate properly the parameter `fill_value`.r   r   r   r   r   r   rD   r   T)r.   rI   r   r  r   N)rJ   rk   r   rN   r   r  r5   )r9   r   r>   r'   r'   r(   *test_iterative_imputer_constant_fill_value  s   "
r  c               	   C   s  t ddt jt jgddt jdgddt jt jgdd	t jd
gg} t j t j t j dg}t jt jt jdg}t||dd| }t j| ddd}|j|jksNJ t |t 	| t
dks_J t |t 	| t
dkspJ t ddt jt jgdddt jgddt jt jgdd	d
t jgg} t j t j dt j g}t jt jdt jg}t||dd| }| ddddf }|j|jksJ t |t 	| t
dksJ t |t 	| t
dksJ dS )zCheck that we properly apply the empty feature mask to `min_value` and
    `max_value`.

    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/29355
    rf   rG   rg   rV   r   r   r   rF   r  r  F)r  r   r   r   g      @NrU   )rJ   rk   rM   r  r   rN   deleterO   r8  r   r[   rO  r   )r9   r  r   rP   X_without_missing_columnr'   r'   r(   1test_iterative_imputer_min_max_value_remove_empty   sP   """&r  r   c                 C   s   t dt jdgdt jt jgg}t| d}dD ]0}t|||}| r6|j|jks*J t|dddf d q|j|jd |jd d fksGJ qdS )z>Check the behaviour of `keep_empty_features` for `KNNImputer`.rf   rG   rU   )r   rN   r3   Nr   )rJ   rk   rM   r   getattrrO   r   )r   r9   r>   methodrP   r'   r'   r(   $test_knn_imputer_keep_empty_featuresW  s    
$r	  c                  C   s  t d} | d| jg dddi}t| jddd}t||tj	d	gdgd
ggt
d | d| jg dddi}tddd}t||tj	d	gd
gdggt
d | d| jg dddi}t| jddd}t||tj	dgdgdggdd ttjddd}t||tj	dgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdgdggdd | d| jg dddi}t| jdd}t||tj	dgdgdggdd | d| jg dddi}t| jddd}t||tj	dgdgdggdd | d| jg d ddi}t| jdd}t||tj	dgd!gd!gdggdd d S )"Nr_   feature)abcNdestringre   rD   nar   r  r  )r  r  fghok)r   r/   r  )rf   NrU   Int64r   rf   rU   r  )rf   NrG   rU   rB   r-   rG   )rf   NrG   rA   r   )r   Nr  g       r   r  )r   Nr  r  r  )r[   rj   rm   Seriesr   NAr)   rN   rJ   rk   rl   r+   rM   )ro   r   r>   r'   r'   r(   test_simple_impute_pd_nag  sR   
    $  r  c                  C   sd   t d} tj}| j||d|gd|ddggg dd}t|d|}| }g d	}t|| d
S )zDCheck that missing indicator return the feature names with a prefix.r_   rf   rg   rG   rF   r`   rh   r   )missingindicator_amissingindicator_bmissingindicator_dN)	r[   rj   rJ   rM   rm   r   r2   get_feature_names_outr   )ro   r.   r9   r  rp   expected_namesr'   r'   r(   (test_missing_indicator_feature_names_out  s   


r  c                  C   s\   ddgddgddgg} t dd| }|tjtjgg}|jtks$J t|ddgg dS )zkCheck transform uses object dtype when fitted on an object dtype.

    Non-regression test for #19572.
    ra   rb   rc   rC   rH   N)r   r2   r3   rJ   rM   r#   rl   r   )r9   imp_frequentr?   r'   r'   r(    test_imputer_lists_fit_transform  s
   r  
dtype_testc                 C   sn   t jddt jgt jddgg dgt jd}t |}t jt jt jt jgg| d}||}|j| ks5J dS )zACheck transform preserves numeric dtype independent of fit dtype.r   g333333@r   )g@rG   rf   re   N)rJ   asarrayrM   r  r   r2   r3   r#   )r  r9   imprL  r?   r'   r'   r(   .test_imputer_transform_preserves_numeric_dtype  s    
r   
array_typerk   r   c           	   	   C   s   t t jdgt jdgt jdgg}t|| }d}td||d}dD ]T}|drK|sKd	}tjt|d
 t	|||}W d   n1 sEw   Y  nt	|||}|j
|j
ksZJ | dkrh|dddf  n|dddf }t|| q!dS )zCheck the behaviour of `keep_empty_features` with `strategy='constant'.
    For backward compatibility, a column full of missing values will always be
    fill and never dropped.
    rG   rU   r   rF   rD   r   r  r2   z7`strategy="constant"`, empty features are not dropped. rX   Nr   r   )rJ   rk   rM   r   r   
startswithr[   r\   FutureWarningr  rO   r8   r   )	r!  r   r9   r   r>   r  warn_msgrP   constant_featurer'   r'   r(   0test_simple_imputer_constant_keep_empty_features  s*   "
*r&  c                 C   s   t t jdgt jdgt jdgg}t||}t| |d}dD ]@}t|||}|rM|j|jks1J |dkr?|dddf  n|dddf }t|d q|j|jd |jd	 d	 fks^J qdS )
zYCheck the behaviour of `keep_empty_features` with all strategies but
    'constant'.
    rG   rU   r   r   r  r   Nr   rf   )	rJ   rk   rM   r   r   r  rO   r8   r   )r/   r!  r   r9   r>   r  rP   r%  r'   r'   r(   'test_simple_imputer_keep_empty_features  s   "
*$r'  c              
   C   s   t g dddt jgt jdt jgg dg dt jddgg}t g dg dg d	g dg dg d
g}tt jt jd}||}t|| tt jt jd}|| |}t| | d S )N)r   r   r   333333@r   r   )r   r   r   )g@r   r   皙?)r(  r   r   )r   r   r   )r   r)  r)  r-   )rJ   rk   rM   r   r8  rN   r   r8   )rr   r9   r:   r>   r?   r'   r'   r(   test_imputation_custom  s0   



r*  c                  C   sH  d} t jg dg dgt jd}td| dd}d| d	t| d
}tjtt	|d |
| W d   n1 s<w   Y  t jg dg dgt jd}|
| d|jjd
}tjtt	|d || W d   n1 svw   Y  t dddg}|t j}|D ]} td| dd}||}|j|jksJ qdS )zCheck that we raise a proper error message when we cannot cast the fill value
    to the input data type. Otherwise, check that the casting is done properly.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28309
    r   r  )rG   rU   rg   re   rD   rG   )r/   r   r.   r   r   r   rX   Nz%The dtype of the filling value (i.e. rf   )rJ   rk   int64r   r   r[   rs   rt   r   r   r2   r  r5   r#   r3   r  float32rN   )r   X_int64r>   r0   	X_float64fill_value_list	X_float32r?   r'   r'   r(   /test_simple_imputer_constant_fill_value_casting(  s2   

r1  c                 C   s^   t t jddgdt jdgddt jgg}t| ddd}t| dd	d}t|||| d
S )a  Check the behaviour of `keep_empty_features` with no empty features.

    With no-empty features, we should get the same imputation whatever the
    parameter `keep_empty_features`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/29375
    r   rf   rG   rU   rg   rV   FrI   r   r   TN)rJ   rk   rM   r   r   rN   )r/   r9   imputer_drop_empty_featuresimputer_keep_empty_featuresr'   r'   r(   (test_iterative_imputer_no_empty_featuresO  s   (
r5  rL  )rf   rG   rU   rg   )rV   r   r   r   r   r   r   c           	      C   s   t t jt jddgt jdt jdgt jddt jgg}t| ddd}||}||}t| dd	d}||}||}t||d
d
dd
f  t|d
d
df d |jd |jd ks`J |jd |jd kslJ d
S )a.  Check the behaviour of `keep_empty_features` in the presence of empty features.

    With `keep_empty_features=True`, the empty feature will be imputed with the value
    defined by the initial imputation.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/29375
    r   rf   rG   rU   rg   rV   Fr2  TN)rJ   rk   rM   r   rN   r3   r   rO   )	r/   rL  rK  r3  X_train_drop_empty_featuresX_test_drop_empty_featuresr4  X_train_keep_empty_featuresX_test_keep_empty_featuresr'   r'   r(   *test_iterative_imputer_with_empty_featuresi  s"   ,



r:  )r   r   r{  	itertoolsr   numpyrJ   r[   scipyr   scipy.statsr   sklearnr   sklearn.datasetsr   sklearn.dummyr   sklearn.exceptionsr	   sklearn.experimentalr
   sklearn.imputer   r   r   r   sklearn.impute._baser   sklearn.linear_modelr   r   r   sklearn.model_selectionr   sklearn.pipeliner   r   sklearn.random_projectionr   sklearn.utils._testingr   r   r   r   r   sklearn.utils.fixesr   r   r   r   r    r)   r+   r@   markparametrizerR   r^   rq   ru   r   r   r   r   rl   strr   r   r#   r   r   rM   r   r   r   r   r  r   r   r   r   r	  r  r  r  r  r,  r5  r:  r;  rG  rN  rX  r^  ri  rn  rq  ru  r  rk   r  r  r  r  r  rK   r   r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  rz  r   r  r  r  r	  r  r  r  r,  r  r   r&  r'  r*  r1  r5  r:  r'   r'   r'   r(   <module>   s2   	'
	

E
"





#
"
+	
%
!
2
$*"


		0


&,


	






9


7<!'&$