o
    ?Hhtz                     @   s  d Z ddlmZmZ ddlZddlZddlZddlZddl	m
Z
 ddlmZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZ dd
l m!Z!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z:m;Z; e5dZ<e Z=e<>e=j?j@ZAe=jBeA e=_Be=j?eA e=_?e ZCe<>eCj?j@ZAeCjBeA eC_BeCj?eA eC_?dd ZDejEFdee;e: ddddddddddddddddddgg d d!d" ZGd#d$ ZHejEFd%e;e: d&d' ZIG d(d) d)e
ZJd*d+ ZKd,d- ZLd.d/ ZMd0d1 ZNd2d3 ZOd4d5 ZPd6d7 ZQd8d9 ZRd:d; ZSd<d= ZTd>d? ZUd@dA ZVG dBdC dCe
ZWdDdE ZXdsdGdHZYdIdJ ZZdKdL Z[dMdN Z\dOdP Z]dQdR Z^dSdT Z_dUdV Z`dWdX ZadYdZ Zbd[d\ Zcd]d^ Zdd_d` Zedadb Zfdcdd Zgdedf ZhejEFdgeedhdidfeedhdidfee dfee0 dfgdjdk ZiejEFdleedhdmdhdneedhdmdhdngdodp ZjejEFdleedhdmdhdneedhdmdhdngdqdr ZkdS )tzE
Testing for the bagging ensemble module (sklearn.ensemble.bagging).
    )cycleproductN)BaseEstimator)load_diabetes	load_irismake_hastie_10_2)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressorBaggingClassifierBaggingRegressorHistGradientBoostingClassifierHistGradientBoostingRegressorRandomForestClassifierRandomForestRegressor)SelectKBest)LogisticRegression
Perceptron)GridSearchCVParameterGridtrain_test_split)KNeighborsClassifierKNeighborsRegressor)make_pipeline)FunctionTransformerscale)SparseRandomProjection)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)check_random_state)assert_array_almost_equalassert_array_equal)CSC_CONTAINERSCSR_CONTAINERSc            	      C   s   t d} ttjtj| d\}}}}tddgddgddgddgd	}d t td
dtddt	 t
 g}t|t|D ]\}}td|| dd|||| q:d S )Nr   random_state      ?      ?      TFmax_samplesmax_features	bootstrapbootstrap_features   max_iter   )	max_depth)	estimatorr(   n_estimators )r"   r   irisdatatargetr   r   r   r    r   r   zipr   r   fitpredict)	rngX_trainX_testy_trainy_testgrid
estimatorsparamsr7   r9   r9   c/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_bagging.pytest_classification:   s8   
	
rI   z sparse_container, params, methodr)   r5   Tr-   r*   r,   Fr/   r0   r1   r.   r0   r1   )r?   predict_probapredict_log_probadecision_functionc                    s   G dd dt }td}tttjtj|d\}}}}| |}	| |}
td|ddddd	||	|}t	|||
}td|ddddd	|||}t	|||}t
|| t|	 d
d |jD }t fdd|D ssJ d S )Nc                           e Zd ZdZ fddZ  ZS )z-test_sparse_classification.<locals>.CustomSVC7SVC variant that records the nature of the training setc                       t  || t|| _| S Nsuperr>   type
data_type_selfXy	__class__r9   rH   r>   x      
z1test_sparse_classification.<locals>.CustomSVC.fit__name__
__module____qualname____doc__r>   __classcell__r9   r9   r[   rH   	CustomSVCu       rd   r   r'   linearovr)kerneldecision_function_shaper+   r7   r(   c                 S      g | ]}|j qS r9   rV   .0ir9   r9   rH   
<listcomp>       z.test_sparse_classification.<locals>.<listcomp>c                       g | ]}| kqS r9   r9   rn   tsparse_typer9   rH   rp          r9   )r   r"   r   r   r:   r;   r<   r   r>   getattrr#   rU   estimators_all)sparse_containerrG   methodrd   r@   rA   rB   rC   rD   X_train_sparseX_test_sparsesparse_classifiersparse_resultsdense_classifierdense_resultstypesr9   ru   rH   test_sparse_classification[   s:   


r   c                  C   s   t d} ttjd d tjd d | d\}}}}tddgddgddgddgd}d t t t t	 fD ]}|D ]}t
d
|| d	|||| q9q5d S )Nr   2   r'   r)   r*   TFr-   rj   r9   )r"   r   diabetesr;   r<   r   r	   r!   r   r   r   r>   r?   )r@   rA   rB   rC   rD   rE   r7   rG   r9   r9   rH   test_regression   s0   

r   r{   c                    s"  t d}ttjd d tjd d |d\}}}}G dd dt}ddddd	d
dddd	ddddddddg}| |}| |}	|D ]K}
td| dd|
||}||	}td| dd|
|||}t	| dd |j
D }t|| t fdd|D sJ t|| qCd S )Nr   r   r'   c                       rO   )z)test_sparse_regression.<locals>.CustomSVRrP   c                    rQ   rR   rS   rW   r[   r9   rH   r>      r]   z-test_sparse_regression.<locals>.CustomSVR.fitr^   r9   r9   r[   rH   	CustomSVR   re   r   r)   r5   Tr-   r*   r,   FrJ   rK   r+   rj   c                 S   rk   r9   rl   rm   r9   r9   rH   rp      rq   z*test_sparse_regression.<locals>.<listcomp>c                    rr   r9   r9   rs   ru   r9   rH   rp      rw   r9   )r"   r   r   r;   r<   r   r   r>   r?   rU   ry   r#   rz   )r{   r@   rA   rB   rC   rD   r   parameter_setsr}   r~   rG   r   r   r   r   r9   ru   rH   test_sparse_regression   sN   




r   c                   @      e Zd Zdd Zdd ZdS )DummySizeEstimatorc                 C   s   |j d | _t|| _d S Nr   )shapetraining_size_joblibhashtraining_hash_rW   r9   r9   rH   r>      s   zDummySizeEstimator.fitc                 C   s   t |jd S r   )nponesr   rX   rY   r9   r9   rH   r?      s   zDummySizeEstimator.predictNr_   r`   ra   r>   r?   r9   r9   r9   rH   r          r   c                  C   s   t d} ttjtj| d\}}}}t ||}tt dd| d||}||||||ks3J tt dd| d||}||||||ksNJ tt	 dd||}g }|j
D ]}|j|jd ksjJ ||j q^tt|t|ks}J d S )Nr   r'   r*   F)r7   r.   r0   r(   T)r7   r0   )r"   r   r   r;   r<   r!   r>   r   scorer   ry   r   r   appendr   lenset)r@   rA   rB   rC   rD   r7   ensembletraining_hashr9   r9   rH   test_bootstrap_samples   s>   

r   c                  C   s   t d} ttjtj| d\}}}}tt dd| d||}|jD ]}tjj	d t
|j	d ks3J q!tt dd| d||}|jD ]}tjj	d t
|j	d ksVJ qDd S )Nr   r'   r*   F)r7   r/   r1   r(   r+   T)r"   r   r   r;   r<   r   r!   r>   estimators_features_r   r   unique)r@   rA   rB   rC   rD   r   featuresr9   r9   rH   test_bootstrap_features#  s2   

"
"r   c                  C   s  t d} ttjtj| d\}}}}tjddd` tt | d	||}t
tj||ddtt| t
||t|| tt | dd		||}t
tj||ddtt| t
||t|| W d    d S 1 s{w   Y  d S )
Nr   r'   ignore)divideinvalidrj   r+   )axis   )r7   r(   r.   )r"   r   r:   r;   r<   r   errstater   r    r>   r#   sumrL   r   r   exprM   r   r@   rA   rB   rC   rD   r   r9   r9   rH   test_probability?  s8   
"r   c            	   	   C   s   t d} ttjtj| d\}}}}t t fD ]H}t|ddd| d||}|	||}t
||j dk s7J d}tjt|d t|d	dd| d}||| W d    n1 sZw   Y  qd S )
Nr   r'   d   Tr7   r8   r0   	oob_scorer(   皙?{Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.matchr+   )r"   r   r:   r;   r<   r    r   r   r>   r   abs
oob_score_pytestwarnsUserWarning)	r@   rA   rB   rC   rD   r7   clf
test_scorewarn_msgr9   r9   rH   test_oob_score_classificationb  s<   
r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd| d||}|||}t	||j
 dk s0J d}tjt|d tt d	dd| d}||| W d    d S 1 sUw   Y  d S )
Nr   r'   r   Tr   r   r   r   r+   )r"   r   r   r;   r<   r   r!   r>   r   r   r   r   r   r   )	r@   rA   rB   rC   rD   r   r   r   regrr9   r9   rH   test_oob_score_regression  s6   
"r   c                  C   sf   t d} ttjtj| d\}}}}tt ddd| d||}t ||}t|	||	| d S )Nr   r'   r+   F)r7   r8   r0   r1   r(   )
r"   r   r   r;   r<   r   r   r>   r#   r?   )r@   rA   rB   rC   rD   clf1clf2r9   r9   rH   test_single_estimator  s   
r   c                  C   s2   t jt j} }t }tt|| |drJ d S )NrN   )r:   r;   r<   r    hasattrr   r>   )rY   rZ   baser9   r9   rH   
test_error  s   r   c                  C   s  t tjtjdd\} }}}tt ddd| |}||}|jdd ||}t	|| tt ddd| |}||}t	|| tt
ddddd| |}||}|jdd ||}	t	||	 tt
ddddd| |}||}
t	||
 d S )	Nr   r'      n_jobsr(   r+   r   rg   )ri   )r   r:   r;   r<   r   r    r>   rL   
set_paramsr#   r   rN   )rA   rB   rC   rD   r   y1y2y3
decisions1
decisions2
decisions3r9   r9   rH   test_parallel_classification  sF   









r   c            	      C   s   t d} ttjtj| d\}}}}tt ddd||}|jdd |	|}|jdd |	|}t
|| tt ddd||}|	|}t
|| d S )Nr   r'   r   r   r+   r   r5   )r"   r   r   r;   r<   r   r!   r>   r   r?   r#   )	r@   rA   rB   rC   rD   r   r   r   r   r9   r9   rH   test_parallel_regression  s"   




r   c                  C   sD   t jt j} }d||dk< ddd}ttt |dd| | d S )Nr+   r5   )r+   r5   )r8   estimator__Croc_auc)scoring)r:   r;   r<   r   r   r   r>   )rY   rZ   
parametersr9   r9   rH   test_gridsearch	  s   
 r   c                  C   s,  t d} ttjtj| d\}}}}td ddd||}t|jt	s$J tt	 ddd||}t|jt	s8J tt
 ddd||}t|jt
sLJ ttjtj| d\}}}}td ddd||}t|jtslJ tt ddd||}t|jtsJ tt ddd||}t|jtsJ d S )Nr   r'   r   r   )r"   r   r:   r;   r<   r   r>   
isinstance
estimator_r    r   r   r   r!   r   r   r9   r9   rH   test_estimator  s6   

r   c                  C   sL   t ttddt dd} | tjtj t| d j	d d j
ts$J d S )Nr+   )kr5   )r/   r   )r   r   r   r    r>   r:   r;   r<   r   stepsr(   int)r7   r9   r9   rH   test_bagging_with_pipelineA  s
   "r   c                   @   r   )DummyZeroEstimatorc                 C   s   t || _| S rR   )r   r   classes_rW   r9   r9   rH   r>   J  s   zDummyZeroEstimator.fitc                 C   s   | j tj|jd td S )Nr   )dtype)r   r   zerosr   r   r   r9   r9   rH   r?   N  s   zDummyZeroEstimator.predictNr   r9   r9   r9   rH   r   I  r   r   c                  C   s   t t } td}| tjtjtj t	t
 | jtjtj|jdtjjd dd W d    d S 1 s9w   Y  d S )Nr   
   )size)sample_weight)r   r   r"   r>   r:   r;   r<   r?   r   raises
ValueErrorrandintr   )r7   r@   r9   r9   rH   1test_bagging_sample_weight_unsupported_but_passedR  s   
"r   *   c                 C   s   t ddd\}}d }dD ]"}|d u rt|| dd}n|j|d ||| t||ks.J qtd| d	d}||| td
d |D tdd |D ksPJ d S )Nr2   r+   	n_samplesr(   )r   r   T)r8   r(   
warm_startr8   r   Fc                 S   rk   r9   r'   rn   treer9   r9   rH   rp   t  rq   z#test_warm_start.<locals>.<listcomp>c                 S   rk   r9   r'   r   r9   r9   rH   rp   u  rq   )r   r   r   r>   r   r   )r(   rY   rZ   clf_wsr8   	clf_no_wsr9   r9   rH   test_warm_start_  s"   r   c                  C   sp   t ddd\} }tddd}|| | |jdd tt || | W d    d S 1 s1w   Y  d S )	Nr2   r+   r   r   T)r8   r   r,   r   )r   r   r>   r   r   r   r   rY   rZ   r   r9   r9   rH   $test_warm_start_smaller_n_estimatorsy  s   "r   c            	      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| ||}|d
7 }d}tjt|d ||| W d    n1 sCw   Y  t||| d S )Nr2   r+   r   +   r'   r   TS   r8   r   r(   r*   z;Warm-start fitting without increasing n_estimators does notr   )	r   r   r   r>   r?   r   r   r   r$   )	rY   rZ   rA   rB   rC   rD   r   y_predr   r9   r9   rH   "test_warm_start_equal_n_estimators  s   
r   c            
      C   s   t ddd\} }t| |dd\}}}}tdddd	}||| |jd
d ||| ||}td
ddd	}||| ||}	t||	 d S )Nr2   r+   r   r   r'   r   TiE  r   r   r   F)r   r   r   r>   r   r?   r#   )
rY   rZ   rA   rB   rC   rD   r   r   r   r   r9   r9   rH   test_warm_start_equivalence  s   

r   c                  C   sZ   t ddd\} }tdddd}tt || | W d    d S 1 s&w   Y  d S )Nr2   r+   r   r   T)r8   r   r   )r   r   r   r   r   r>   r   r9   r9   rH   $test_warm_start_with_oob_score_fails  s
   "r   c                  C   s~   t ddd\} }tddd}|| | |jdddd	 || | tt t|d
 W d    d S 1 s8w   Y  d S )Nr   r+   r   r   T)r8   r   Fr   )r   r   r8   r   )r   r   r>   r   r   r   AttributeErrorrx   r   r9   r9   rH   $test_oob_score_removed_on_warm_start  s   "r   c                  C   sH   t ddd\} }tt ddddd}|| |j|| |jks"J d S )N   r+   r   r)   T)r.   r/   r   r(   )r   r   r   r>   r   rY   rZ   baggingr9   r9   rH   test_oob_score_consistency  s   $r  c                  C   s   t ddd\} }tt ddddd}|| | |j}|j}|j}t|t|ks+J t|d t| d ks9J |d jj	d	ksCJ d}|| }|| }|| }	| | d d |f }
|| }|	j
}|	|
| |	j
}t|| d S )
Nr   r+   r   r)   F)r.   r/   r(   r0   r   r5   ro   )r   r   r   r>   estimators_samples_r   ry   r   r   kindcoef_r#   )rY   rZ   r  estimators_samplesestimators_featuresrF   estimator_indexestimator_samplesestimator_featuresr7   rA   rC   
orig_coefs	new_coefsr9   r9   rH   test_estimators_samples  s2   r  c                  C   s   t  } | j| j}}ttddt }t|ddd}||| |jd j	d d j
 }|jd }|jd }|jd }|| d d |f }	|| }
||	|
 t|j	d d j
| d S )Nr5   )n_componentsr)   r   )r7   r.   r(   r   r+   )r   r;   r<   r   r   r   r   r>   ry   r   r  copyr  r   r$   )r:   rY   rZ   base_pipeliner   pipeline_estimator_coefr7   estimator_sampleestimator_featurerA   rC   r9   r9   rH   %test_estimators_samples_deterministic  s   


r  c                  C   sH   d} t d|  dd\}}tt | ddd}||| |j| ks"J d S )Nr   r5   r+   r   r)   )r.   r/   r(   )r   r   r   r>   _max_samples)r.   rY   rZ   r  r9   r9   rH   test_max_samples_consistency  s   r  c                  C   s   d} dgdgdggd }g dd }g dd }g dd }t d| d	||j}t d| d	||j}t d| d	||j}||g||gksIJ d S )
Nr   r   r   r+   )ABC)r   r   r+   )r   r+   r5   T)r   r(   )r   r>   r   )r(   rY   Y1Y2Y3x1x2x3r9   r9   rH   !test_set_oob_score_label_encoding  s$   


r   c                 C   s"   | j ddd} d| t|  < | S )NfloatT)r  r   )astyper   isfinite)rY   r9   r9   rH   replace7  s   r$  c               	   C   sL  t g dg ddt jdgdt jdgdt j dgg} t g dt g dg dg dg dg dgg}|D ]k}t }ttt|}|| |	|  t
|}|| |	| }|j|jksbJ t }t|}tt || | W d    n1 sw   Y  t
|}tt || | W d    n1 sw   Y  q8d S )Nr+   r   r   r5   N   r5   r'  )r5   r   r   r   r   )r5   r+   	   )r   r'     )r   arraynaninfr!   r   r   r$  r>   r?   r   r   r   r   r   )rY   y_valuesrZ   	regressorpipelinebagging_regressory_hatr9   r9   rH   *test_bagging_regressor_with_missing_inputs=  sH   


r2  c               	   C   s4  t g dg ddt jdgdt jdgdt j dgg} t g d}t }ttt|}|| |	|  t
|}|| | |	| }|j|jksLJ ||  ||  t }t|}tt || | W d    n1 ssw   Y  t
|}tt || | W d    d S 1 sw   Y  d S )Nr%  r&  r5   r'  )r   r'  r'  r'  r'  )r   r*  r+  r,  r    r   r   r$  r>   r?   r   r   rM   rL   r   r   r   )rY   rZ   
classifierr/  bagging_classifierr1  r9   r9   rH   +test_bagging_classifier_with_missing_inputsf  s6   

	


"r5  c                  C   sD   t ddgddgg} t ddg}tt ddd}|| | d S )Nr+   r5   r   r,   r   g333333?)r/   r(   )r   r*  r   r   r>   r   r9   r9   rH   test_bagging_small_max_features  s   r6  c                  C   sj   t jd} | dd}t d}G dd dt}t| ddd}||| t|j	d j
|jd  d S )Nr      r,   c                   @   s   e Zd ZdZdd ZdS )z8test_bagging_get_estimators_indices.<locals>.MyEstimatorz7An estimator which stores y indices information at fit.c                 S   s
   || _ d S rR   )_sample_indicesrW   r9   r9   rH   r>     s   
z<test_bagging_get_estimators_indices.<locals>.MyEstimator.fitN)r_   r`   ra   rb   r>   r9   r9   r9   rH   MyEstimator  s    r9  r+   )r7   r8   r(   )r   randomRandomStaterandnaranger!   r   r>   r$   ry   r8  r  )r@   rY   rZ   r9  r   r9   r9   rH   #test_bagging_get_estimators_indices  s   
r>  zbagging, expected_allow_nanr+   r3   c                 C   s   |   jj|ks
J dS )z*Check that bagging inherits allow_nan tag.N)__sklearn_tags__
input_tags	allow_nan)r  expected_allow_nanr9   r9   rH   test_bagging_allow_nan_tag  s   rC  modelr   )r7   r8   c                 C   sB   t jdd | tjtj W d   dS 1 sw   Y  dS )zAMake sure that metadata routing works with non-default estimator.T)enable_metadata_routingN)sklearnconfig_contextr>   r:   r;   r<   rD  r9   r9   rH   "test_bagging_with_metadata_routing  s   "rI  c                 C   s   |  tjtj dS )z^Make sure that we still can use an estimator that does not implement the
    metadata routing.N)r>   r:   r;   r<   rH  r9   r9   rH   -test_bagging_without_support_metadata_routing  s   rJ  )r   )lrb   	itertoolsr   r   r   numpyr   r   rF  sklearn.baser   sklearn.datasetsr   r   r   sklearn.dummyr   r	   sklearn.ensembler
   r   r   r   r   r   r   r   sklearn.feature_selectionr   sklearn.linear_modelr   r   sklearn.model_selectionr   r   r   sklearn.neighborsr   r   sklearn.pipeliner   sklearn.preprocessingr   r   sklearn.random_projectionr   sklearn.svmr   r   sklearn.treer    r!   sklearn.utilsr"   sklearn.utils._testingr#   r$   sklearn.utils.fixesr%   r&   r@   r:   permutationr<   r   permr;   r   rI   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r   r$  r2  r5  r6  r>  rC  rI  rJ  r9   r9   r9   rH   <module>   s    (
!


)
8	*#%$),	

() 

	



