o
    ?Hh9                    @   s  U d Z ddlZddlZddlZddlZddlZddlZddlmZm	Z	 ddl
Z
ddlZddlZddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl3m;Z< ddl=m>Z> ddl?m@Z@mAZAmBZBmCZCmDZDmEZE ddlFmGZGmHZHmIZImJZJ ddlKmLZL dZMdZNe(e*dZOe)e+dZPeQ ZReQeSd< eRTeO eRTeP g dZUeVg dg dg dg dg d g d!g d"g d#g d$g d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2gZWg d3ZXg d4ZYd5d6gd6d6gd6d5gd7d7gd7d8gd8d7ggZZg d9Z[d6d6gd8d8gd:d8ggZ\g d;Z]e^ Z_ej`ad7Zbebce_jdjeZfe_jgef e__ge_jdef e__deh ZiebceijdjeZfeijgef ei_geijdef ei_dej ZkebcekjdjeZfekjgef ek_gekjdef ek_deLdZlejmdd<d=d>\ZnZoeljpd?d@ZqdAeqeqdBk< eljrddCdDd@Zse&dEd=dFddGt Zue_jge_jddHeijgeijddHekjgekjddHeZe[dHeWeXdHeWeYdHeneodHeqesdHeq esdHeuesdHevdIesdHdJZwdKdL ZxdMdN ZydOdP Zzej{|dQeP} ej{|dReNdSdT Z~dUdV ZdWdX Zej{|dYeP ej{|dReNdZd[ ZeEej{|dYeP ej{|d\d]d^ed_fd`dEed_fdad^ed_fdbd^ed<fgdcdd Zdedf Zdgdh Zdidj Zdkdl Zdmdn Zdodp Zdqdr Zdsdt Zdudv Zdwdx Zdydz ZdKd{d|Zej{|d}eRd~d Zej{|d}eUej{|deIdd Z	dKddZej{|d}eRdd Zej{|d}eUej{|deIdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zej{|d}eOdd Zej{|d}eOdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdKddZej{|deUej{|dddd Zej{|deeeUePej{|dddgdd Zej{|deUej{|dg dej{|deIdd Zej{|dee	dd eUD eNee	dd eUD eM ej{|dg dej{|deIdd Zej{|deUej{|deeIeJddń ZddǄ Zej{|d}eRddɄ Zej{|d}eRej{|ddgeI dd̄ Zej{|d}eRdd΄ Zej{|d}eUej{|deJddф Zddӄ Zej{|d}eRddՄ Zej{|d}eRej{|deJddׄ Zddل Zddۄ Zej{|ddgeI dd݄ Zej{|deeew ddh ej{|de(e*gdd Zej{|dew ej{|de)e+gdd Zdd Zdd Zdd Zej{|d}eRej{|dddgej{|ddgeI eJ dd Zej{|dRg dej{|dQeP} dd Zej{|ded:dd Zdd Zej{|dQe(e*gej{|dd8dCgdd Zdd Zdd Zdd Zdd  Z͐dd Zΐdd Zϐdd ZАdd Zѐd	d
 ZҐdd Zej{|dee/} e0} dd ZԐdd Zej{|dQeR} dd Zej{|dRd]dagdd Zej{|ded:ej{|dRd]dagdd Zej{|dRddgdd Zej{|dRddgdd Zej{|dRddgdd Zej{|dRddgd d! Zej{|ddgeJ ej{|d"e)d`d#e+d`d#gd$d% Zej{|dQeP} d&d' Zސd(d) Zej{|d*eje)dfeje+d+fee(d,fee*d-fgej{|d.dd/gd0d1 Zej{|d2eeO} d3d4gd5d6 Zej{|d7eje)feje(fgd8d9 Zd:d; Zej{|dQe)e+gej{|d<eVejd8ejdCd=d>geVejejd:dCd=d>geVd7d8d:dCejejgeVd7d8d:ejd>ejggej{|dRd]dagd?d@ ZdAdB ZdCdD ZdEdF ZdGdH ZdIdJ ZdS (L  z-
Testing for the tree module (sklearn.tree).
    N)chainproduct)NumpyPickler)assert_allclose)clonedatasetstree)DummyRegressor)NotFittedError)SimpleImputer)accuracy_scoremean_poisson_deviancemean_squared_error)cross_val_scoretrain_test_split)make_pipeline)_sparse_random_matrix)DecisionTreeClassifierDecisionTreeRegressorExtraTreeClassifierExtraTreeRegressor)CRITERIA_CLFCRITERIA_REGDENSE_SPLITTERSSPARSE_SPLITTERS)_py_sort)
NODE_DTYPE	TREE_LEAFTREE_UNDEFINED_build_pruned_tree_py_check_n_classes_check_node_ndarray_check_value_ndarray)Tree)compute_sample_weight)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warningsskip_if_32bit)	_IS_32BITCOO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS)check_random_state)ginilog_loss)squared_errorabsolute_errorfriedman_msepoisson)r   r   )r   r   	ALL_TREES)r   r      r   r   r      ir   r   r   r   r   )r   r         r   r9   r   r   r8   皙?r   r7   r8   )r>   r   r         r   r    @r8   r   r   r?   r   r8   )r>   r>   r   g333333r   r   r   r   r   r   r=   r   r   r8   )r>   r>   r   r   r   r   r   r;   r   r   r   r   r   r8   )r>   r   r7   
   r7   r   皙	r   r7   r;   r9   r8   )zG @r         r      r   r   rD            ?r   rB   r8   )rE   r   rF   rG   r   rH   r   r   rD   rI   r   r   rA   r8   )rE      rF   rG   r   rH   r   r   rD   rI   r   r   rA   r8   )rE   rK   rF   rG   r   rH   r   r   rD   rI   rJ   r   r>   r   )   rK   r:   r8   rJ   r9   rC   r   r8   r<   r;   r   rL   r   )rL   r   r8   r8   r8   r>   r8   r   r   rA   r;   r   r8   r   )rL   r   r8   rL   r;   r>   rC   rL   r   r>   r8   rL   rL   r   )r8   r8   r   rL   rL   r>   r8   rL   r   r<   r8   rL   r;   r   )r;   r8   r   r;   r   r9   rC   r   r8   r<   r;   r   r;   r8   )rE   rK   rF   rG   r   r8   r   r   rD   rI   rJ   r   rB   r8   )rE   rK   rF   rG   r   r8   r   r   rD   rI         ?r8   r>   r>   )rE   rK   rF   rG   r   rC   r   r   rD   rI   rJ   r   r>   r>   )rL   r   r:   r8   rJ   rA   rC   r   r8   r<   r;   r8   r   r>   )rL   r   r8   r8   r8   rA   r8   r   r   rA   r   r   r   r8   )rL   r8   r8   r8   rL   r>   rC   rL   r   r>   r   rL   r8   r8   )r8   r8   r   r   r8   rB   r8   rL   r   r<   r8   rL   r8   r8   )r;   r8   r   r8   r   r9   r8   r   r8   rA   r   r   r8   r   )r8   r8   r   r   r   r   r8   r8   r8   r8   r8   r8   r   r   r   r8   r   r   r8   r   r   r   r   )      ?r@   333333?皙?rC   g333333@@g)\(?{Gz?gףp=
@rQ   g?        rO   rL   rH   r   r         @g|?5^?g(\??r   rA   r>   r8   rL   )r>   r>   r>   r8   r8   r8   r;   )r>   r8   r8      rC   )random_state	n_samples
n_features)   r:   sizerS   g?r7   )rZ   rZ   g      ?)densityrW   Xy)rZ   r;   )irisdiabetesdigitstoy	clf_small	reg_small
multilabel
sparse-pos
sparse-neg
sparse-mixzerosc                 C   s   |j | j ksJ d||j | j t| j|j|d  t| j|j|d  | jtk}t|}t| j| |j| |d  t| j	| |j	| |d  t| j
 |j
 |d  t| j
|j
|d  t| j|j|d d	 t| j| |j| |d
 d	 d S )Nz({0}: inequal number of node ({1} != {2})z: inequal children_rightz: inequal children_leftz: inequal featuresz: inequal thresholdz: inequal sum(n_node_samples)z: inequal n_node_samplesz: inequal impurityerr_msgz: inequal value)
node_countformatr'   children_rightchildren_leftr   nplogical_notfeature	thresholdn_node_samplessumr%   impurityr&   value)dsmessageexternalinternal r   \/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/tree/tests/test_tree.pyassert_tree_equal   s>   



r   c                  C   st   t  D ]3\} }|dd}|tt t|ttd	|  |ddd}|tt t|ttd	|  qd S )Nr   rW   Failed with {0}r8   )max_featuresrW   )
	CLF_TREESitemsfitr_   r`   r'   predictTtrue_resultro   namer#   clfr   r   r   test_classification_toy   s   
r   c                  C   s   t  D ]<\} }|dd}|jtttttd t|	t
td|  |jtttttdd t|	t
td|  qd S )Nr   r   sample_weightr   rJ   )r   r   r   r_   r`   rr   oneslenr'   r   r   r   ro   fullr   r   r   r    test_weighted_classification_toy   s   
r   r#   	criterionc                 C   s   |dkrt t td }t t| }t t| }nt}t}| |dd}|t| t|	t
| | |ddd}|t| t|	t
| d S )Nr5   r8   r   rW   r   r   rW   )rr   absminr`   arrayr   r   r_   r   r   r   )r#   r   ay_trainy_testregr   r   r   r   test_regression_toy  s   r   c                  C   s   t d} d| d dd df< d| dd dd f< t | j\}}t | | gj}|  } t D ]9\}}|dd}|	||  |
|| dksSJ d||ddd}|	||  |
|| dksnJ d|q5d S )	N)rC   rC   r8   r:   r   r   rN   r   rW   r   )rr   rk   indicesshapevstackravelr   r   r   r   scorero   )r`   gridxgridyr_   r   r#   r   r   r   r   test_xor  s   

 r   c                  C   s   t t tD ]Q\\} }}||dd}|tjtj t|	tjtj}|dks2J d
| ||||ddd}|tjtj t|	tjtj}|dksXJ d
| ||qd S )Nr   r   rU   z0Failed with {0}, criterion = {1} and score = {2}rL   r   rJ   )r   r   r   CLF_CRITERIONSr   ra   datatargetr   r   ro   )r   r#   r   r   r   r   r   r   	test_iris3  s   r   z
name, Treec                 C   s\   ||dd}| tjtj ttj|tj}|tdks,J d|  d| d| d S )Nr   r   zFailed with z, criterion = z and score = )r   rb   r   r   r   r   pytestapprox)r   r#   r   r   r   r   r   r   test_diabetes_overfitE  s   r   z&criterion, max_depth, metric, max_lossr2      <   r3   r4   r5   c                 C   sT   |||ddd}| tjtj |tj|tj}d|  k r%|k s(J  J d S )NrI   r   )r   	max_depthr   rW   )r   rb   r   r   r   )r   r#   r   r   metricmax_lossr   lossr   r   r   test_diabetes_underfitR  s    r   c                  C   s   t  D ]V\} }|dddd}|tjtj |tj}tt	|dt
tjjd d| d tt|d|tjd| d t|tjt|tjdd| d qd S )Nr8   *   r   r   rW   r   r   rl   rK   )r   r   r   ra   r   r   predict_probar&   rr   rw   r   r   ro   r'   argmaxr   r%   exppredict_log_proba)r   r#   r   prob_predictr   r   r   test_probabilityg  s*   



r   c                  C   sP   t dd d t jf } t d}t D ]\}}|d dd}|| | qd S )Ni'  r   r   rW   )rr   arangenewaxis	REG_TREESr   r   r_   r`   r   r#   r   r   r   r   test_arrayrepr  s   
r   c                  C   s   ddgddgddgddgddgddgg} g d}t  D ]\}}|dd}|| | t|| |d|d	 qt D ]\}}|dd}|| | t|| |d|d	 q=d S )
NrA   r>   r8   rL   )r8   r8   r8   r8   r8   r8   r   r   r   rl   )r   r   r   r'   r   ro   r   r%   )r_   r`   r   TreeClassifierr   TreeRegressorr   r   r   r   test_pure_set  s   (

r   c               
   C   s   t g dg dg dg dg dg dg dg} t g d}t jd	d
3 t D ]%\}}|dd}|| | || |  ||  | ||  |  q,W d    d S 1 s]w   Y  d S )N)gs_c@d	a@籛 `8`@?c@)g_9a@g 8`@g-Vu]@g    @Xd@)gSW j_@r   r   r   )g ً`@4Ta@	lKa@{c@)g|@Y@g~G`a@gwI?lKa@g/"c@)g_@r   r   r   )g:^@r   r   r   )rN   gAw?gtQ?5??rS   g7G?gۺ?gb'?raise)allr   r   )rr   r   errstater   r   r   r   r   r   r   test_numerical_stability  s(   
"r   c               	   C   s   t jdddddddd\} }t D ]2\}}|dd}|| | |j}t|dk}|jd dks9J d		||dksDJ d		|qt
dd}|tjtj t
dttjd
}|tjtj t|j|j d S )N  rC   r;   r   FrX   rY   n_informativen_redundant
n_repeatedshufflerW   r   皙?r   rW   max_leaf_nodes)r   make_classificationr   r   r   feature_importances_rr   rw   r   ro   r   ra   r   r   r   r'   )r_   r`   r   r#   r   importancesn_importantclf2r   r   r   test_importances  s*   



r   c                  C   s@   t  } tt t| d W d    d S 1 sw   Y  d S )Nr   )r   r   raises
ValueErrorgetattrr   r   r   r   test_importances_raises  s   "r   c               	   C   s   t jdddddddd\} }tdddd	| |}td
ddd	| |}t|j|j t|jj	|jj	 t|jj
|jj
 t|jj|jj t|jj|jj d S )Ni  rC   r;   r   Fr   r0   r:   )r   r   rW   r2   )r   r   r   r   r   r%   r   r'   tree_rt   rq   rp   rv   )r_   r`   r   r   r   r   r   )test_importances_gini_equal_squared_error  s,   
r   c                  C   s  t  D ]\} }|dd}|tjtj |jtt	tjj
d ks%J |dd}|tjtj |jtttjj
d ksBJ |dd}|tjtj |jdksVJ |dd}|tjtj |jdksjJ |dd}|tjtj |jdks~J |dd}|tjtj |jtdtjj
d  ksJ |dd}|tjtj |jtjj
d ksJ |d d}|tjtj |jtjj
d ksJ qd S )	Nsqrt)r   r8   log2r;   rR   rJ   rN   )r6   r   r   ra   r   r   max_features_intrr   r   r   r   )r   TreeEstimatorestr   r   r   test_max_features  s4   
 
 





r   c            	   	   C   s  t  D ]o\} }| }tt |t W d    n1 s!w   Y  |tt g dg}tt	 || W d    n1 sFw   Y  | }td d }tt	 |t| W d    n1 sjw   Y  t
t}| }||t t|tt | }tt |t W d    n1 sw   Y  |tt t
t}tt	 ||d d dd f  W d    n1 sw   Y  t
tj}| }|t
t|t tt	 |t W d    n1 sw   Y  tt	 |t W d    n	1 sw   Y  | }|tt tt	 || W d    n	1 s6w   Y  tt	 || W d    n	1 sQw   Y  | }tt |t W d    n	1 sow   Y  qtdd}tjt	dd |g dgg d	 W d    n	1 sw   Y  tjt	d
d |g dgg d W d    d S 1 sw   Y  d S )N)rA   r>   r8   r>   r8   r5   r   zy is not positive.*Poissonmatchr   r8   rL   )r   r   r   zSome.*y are negative.*Poisson)r:   grL   )r   r   r   r   r
   r   r_   r   r`   r   rr   asfortranarrayr%   r   r   r   asarrayr   dotapplyr   )	r   r   r   X2y2XftXtr   r   r   r   
test_error  sr   



$r   c                  C   s   t jtjtjjd} tj}tdt	
 D ]R\}}t	| }|d|dd}|| | |jj|jjdk }t |dksAJ d||d	|dd}|| | |jj|jjdk }t |dksfJ d|qd
S )z Test min_samples_split parameterdtypeN  rC   r   )min_samples_splitr   rW   r>   	   r   r=   N)rr   r   ra   r   r   _treeDTYPEr   r   r6   keysr   r   rv   rq   r   ro   )r_   r`   r   r   r   r   node_samplesr   r   r   test_min_samples_split_  s"   r  c            	      C   s   t jtjtjjd} tj}tdt	
 D ]`\}}t	| }|d|dd}|| | |j| }t |}||dk }t |dksHJ d||d|dd}|| | |j| }t |}||dk }t |dkstJ d|qd S )	Nr   r   r:   r   )min_samples_leafr   rW   r7   r   r   )rr   r   ra   r   r   r  r  r   r   r6   r  r   r   r   bincountr   ro   )	r_   r`   r   r   r   r   outnode_counts
leaf_countr   r   r   test_min_samples_leaf~  s*   

r  c                 C   s  t | d tj}|dur||}t | d }t|jd }t|}t|  }t	dt
dddD ]F\}}	||	|dd}
|
j|||d	 |durS|
j| }n|
j|}tj||d
}||dk }t|||
j ksyJ d| |
jq3|jd }t	dt
dddD ]B\}}	||	|dd}
|
|| |dur|
j| }n|
j|}t|}||dk }t|||
j ksJ d| |
jqdS )zPTest if leaves contain at least min_weight_fraction_leaf of the
    training setr_   Nr`   r   r   rJ   rI   )min_weight_fraction_leafr   rW   r   )weightsz,Failed with {0} min_weight_fraction_leaf={1})DATASETSastyperr   float32rngrandr   rw   r6   r   linspacer   r   r   tocsrr	  r   r  ro   )r   r   sparse_containerr_   r`   r  total_weightr   r   fracr   r
  node_weightsleaf_weightsr   r   r   check_min_weight_fraction_leaf  sN   


r  r   c                 C      t | d d S Nra   r  r   r   r   r   ,test_min_weight_fraction_leaf_on_dense_input     r!  csc_containerc                 C      t | d|d d S Nrg   )r  r  r   r#  r   r   r   -test_min_weight_fraction_leaf_on_sparse_input  s   r'  c                 C   s  t | d tj}|dur||}t | d }|jd }t|  }tdtdddD ]H\}}|||ddd	}	|	|| |durJ|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j dkssJ d
| |	j|	jq+tdtdddD ]K\}}|||ddd	}	|	|| |dur|	j	
| }
n|	j	
|}
t|
}||dk }t|t||	j ||	j ksJ d
| |	j|	jq~dS )zzTest the interaction between min_weight_fraction_leaf and
    min_samples_leaf when sample_weights is not provided in fit.r_   Nr`   r   r   rJ   r;   r:   )r  r   r  rW   zBFailed with {0} min_weight_fraction_leaf={1}, min_samples_leaf={2}r   )r  r  rr   r  r   r6   r   r  r   r   r   r  r	  r   maxr  ro   r  )r   r   r  r_   r`   r  r   r   r  r   r
  r  r  r   r   r   4check_min_weight_fraction_leaf_with_min_samples_leaf  s`   







r)  c                 C   r  r  r)  r   r   r   r   Btest_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_input"  r"  r+  c                 C   r$  r%  r*  r&  r   r   r   Ctest_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input'  s   
r,  c                 C   s  t jd| d\}}tdt D ]\}}t| }||dd}||ddd}||ddd}||d	dd}	|d
f|df|df|	d	ffD ]\}
}|
j|ksSJ d|
j||
|| t|
j	j
D ]g}|
j	j| tkr|
j	j| }|
j	j| }|
j	j| }|
j	j| }|
j	j| }|| }|
j	j| }|
j	j| }|
j	j| }|| }|| }|| }|
j	j| |jd  }|||  }||ksJ d||q_qAqd S )Nd   rX   rW   r   r   r   rW   rP   )r   min_impurity_decreaserW   g-C6?r   gHz>z)Failed, min_impurity_decrease = {0} > {1}z2Failed with {0} expected min_impurity_decrease={1})r   r   r   r6   r  r0  ro   r   ranger   rn   rq   r   rx   weighted_n_node_samplesrp   r   )global_random_seedr_   r`   r   r   r   est1est2est3est4r   expected_decreasenode
imp_parent
wtd_n_nodeleft
wtd_n_leftimp_leftwtd_imp_leftrightwtd_n_right	imp_rightwtd_imp_rightwtd_avg_left_right_impfractional_node_weightactual_decreaser   r   r   test_min_impurity_decrease1  sh   
rG  c               	      s   t  D ]q\} }d| v rtjtj}}ntjtj}}|dd  ||  ||}g d} fdd|D }t	 }t
|}t| jksLJ |||}	||	ks]J d| |D ]}
tt|j|
||
 d|
 d	|  d
 q_qdS )z8Test pickling preserves Tree properties and performance.
Classifierr   r   )r   rn   capacity	n_classesrq   rp   n_leavesrt   ru   rx   rv   r2  ry   c                    s   i | ]	}|t  j|qS r   )r   r   ).0	attributer   r   r   
<dictcomp>      ztest_pickle.<locals>.<dictcomp>z6Failed to generate same score  after pickling with {0}z"Failed to generate same attribute z after pickling with rl   N)r6   r   ra   r   r   rb   r   r   pickledumpsloadstype	__class__ro   r'   r   r   )r   r   r_   r`   r   
attributesfitted_attributeserialized_objectr5  score2rM  r   rN  r   test_pickley  s8   





rZ  c                  C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg} ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t  D ]\\}}|dd}|| ||}t|| |jdksJ ||}t|dksJ |d jdksJ |d jd	ksJ ||}	t|	dksJ |	d jdksJ |	d jd	ksJ qlt	 D ]\}}
|
dd}|| ||}t
|| |jdksJ qd S )
NrA   r>   r8   rL   r   r;   r   r7   rL   )r7   r7   )r   r   r   r   r'   r   r   r   r   r   r%   )r_   r`   r   y_truer   r   r   y_hatproba	log_probar   r   r   r   r   test_multioutput  s^   





r`  c                  C   s   t  D ]\\} }|dd}|tt |jdksJ t|jddg t	tt
td fj}|dd}|t| t|jdksCJ t|jdksLJ t|jddg t|jddgddgg qd S )Nr   r   rL   r>   r8   rA   )r   r   r   r_   r`   
n_classes_r'   classes_rr   r   r   r   r   )r   r   r   _yr   r   r   test_classes_shape  s   

rd  c                  C   sf   t jd d } t jd d }td|}t D ]\}}|dd}|j| ||d t|| | qd S )N}   balancedr   r   r   )	ra   r   r   r$   r   r   r   r%   r   )unbalanced_Xunbalanced_yr   r   r   r   r   r   r   test_unbalanced_iris  s   

ri  c                  C   s  t t tjtjgD ]\\} }}|dd}tjtj|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtjd|d}tj	}t
||||| tjtj|d}tj	}t
||||| tD ]}|tj|d}tj	}t
||||| qvtD ]}|tj|d}tj	}t
||||| qtjtjd d d |d}tj	d d d }t
||||| qd S )Nr   r   r   C)orderr   Fr;   )r   r6   r   rr   float64r  r   ra   r   r   r'   r   r   ascontiguousarrayr.   r-   )r   r   r   r   r_   r`   csr_containerr#  r   r   r   test_memory_layout  s8   
rp  c                  C   s  t dd d t jf } t d}d|d d< t d}d||dk< tdd}|j| ||d t|| t d t dd d t jf } t d}d|dd< d	|dd< d| dddf< t d}d
||d	k< tddd}|j| ||d |j	j
d dksJ d||d	k< tddd}|j| ||d |j	j
d dksJ tj} tj}td| jd d}tdd}|| | ||  t j|| jd d}tdd}|j| ||d |j	jtjjk}t|j	j
| |j	j
|  d S )Nr-  rS   2   r   r   r      r8   rL   gRQ?r   g     b@rJ   g     H@)	minlength)rr   r   r   r   r   r   r'   r   rk   r   ru   ra   r   r   r  randintr   r	  rq   r   r  r   r&   )r_   r`   r   r   
duplicatesr   r~   r   r   r   test_sample_weight5  sD   






rv  c                  C   s   t dd d t jf } t d}d|d d< tdd}t jdd}tt	 |j
| ||d W d    n1 s;w   Y  t d}td}tjt|d	 |j
| ||d W d    d S 1 sew   Y  d S )
Nr-  rS   rq  r   r   r8   r   zgInput should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(0.)` instead.r   )rr   r   r   r   r   randomr  r   r   r   r   r   reescape	TypeError)r_   r`   r   r   expected_errr   r   r   test_sample_weight_invalidi  s   


"r|  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   rf  class_weightrW   g       @rN   r   r8   r-  g      Y@rL   )r   r   ra   r   r   r%   r   rr   r   r   r   r   )	r   r   clf1r   
iris_multiclf3clf4r   r~  r   r   r   test_class_weights  s@   





r  c                 C   sz   t |  }ttttd fj}|dddgdd}d}tjt|d |	t
| W d    d S 1 s6w   Y  d S )	NrL   rJ   rN   r>   r8   r   r}  zBnumber of elements in class_weight should match number of outputs.r   )r   rr   r   r`   r   r   r   r   r   r   r_   )r   r   rc  r   rm   r   r   r   test_class_weight_errors  s   "r  c                  C   sX   t jddd\} }d}t D ]\}}|d |d d| |}| |d ks)J qd S Nr-  r8   r.  r7   )r   r   )r   make_hastie_10_2r6   r   r   get_n_leavesr_   r`   kr   r   r   r   r   r   test_max_leaf_nodes  s   r  c                  C   sP   t jddd\} }d}t D ]\}}|d|d| |}| dks%J qd S r  )r   r  r6   r   r   	get_depthr  r   r   r   test_max_leaf_nodes_max_depth  s   r  c                  C   sZ   dD ](} t t dgdggddgj| }d|jd   kr%dk s*J d J dqd S )N)rJ  ry   rq   rp   ru   rx   rt   rv   r   r8   rB   r;   z Array points to arbitrary memory)r   r   r   r   flat)attrry   r   r   r   test_arrays_persist  s   "
,r  c                  C   s\   t d} td}| ddd}t D ]\}}|dd}||| |jjdks+J qd S )Nr   )rC   rZ   rL   )rC   r   )	r/   rr   rk   rt  r6   r   r   r   r   )rW   r_   r`   r   r   r   r   r   r   test_only_constant_features  s   

r  c                  C   s~   t t g dgt df} g d}t D ]$\}}d|vr<|ddd}|| | |jjdks4J |jj	d	ks<J qd S )
N)r   r   r   r   r   r8   rL   r7   r:   rI      )r7   rH   )r   r   r   r8   r8   rL   rL   rL   r;   r;   r;   	ExtraTreer   r8   r   rL   r:   )
rr   	transposer   rk   r6   r   r   r   r   rn   r_   r`   r   r   r   r   r   r   ,test_behaviour_constant_feature_after_splits  s   r  c                  C   s   t t dgdgdgdggt dg} t g d}t D ]$\}}|ddd}|| | |jjdks8J t	|
| t dd	 q t D ]$\}}|ddd}|| | |jjdksaJ t	|| t d
d	 qId S )NrN   rS   )r7   r   )rS   rN   rS   rN   r   r8   r   r[  rJ   )r7   )rr   hstackr   rk   r   r   r   r   r   r'   r   r   r   r   r  r   r   r   (test_with_only_one_non_constant_features  s   *r  c                  C   sf   t ddt jdd} t }tjtdd |	| g d W d    d S 1 s,w   Y  d S )Ng\)c=Hr7   r>   r8   r  r   )r   r8   r   r8   )
rr   repeatr  rm  reshaper   r   r   r   r   )r_   r   r   r   r   test_big_input  s
   "r  c                  C   sB   ddl m}  tt |   W d    d S 1 sw   Y  d S )Nr   _realloc_test)sklearn.tree._utilsr  r   r   MemoryErrorr  r   r   r   test_realloc  s   "r  c                  C   s   dt d } tjdd}tjddd}d| d  }td|d}tt	 |
|| W d    n1 s8w   Y  d| d  d }td|d}tt |
|| W d    d S 1 sbw   Y  d S )	NrK   PrC   rL   r   r8   best)splitterr   )structcalcsizerr   rw  randnrt  r   r   r   	Exceptionr   r  )n_bitsr_   r`   huger   r   r   r   test_huge_allocations  s   "r  c                 C   s(  t |  }t| d }t| d }|dv r'|jd d }|d | }|d | }tt t D ]d}||}|d|d||}	|d|d||}
t|	j|
jd	|  |	
|}| tv re|	|}|	|}tt t D ]%}||tjd}t|

|| | tv rt|
|| t|
|| qkq-d S )	Nr_   r`   )rc   rb   r   r:   rW   r   5{0} with dense and sparse format gave different treesr   )r6   r  r   r,   r-   r.   r   r   r   ro   r   r   r   r   rr   r  r&   )r   datasetr   r   r_   r`   rX   r  X_sparserz   r{   y_predy_probay_log_probasparse_container_testX_sparse_testr   r   r   check_sparse_input/  s>   



r  	tree_typer  )re   rd   rc   rg   rh   ri   rj   rk   c                 C   s    |dkrdnd }t | || d S )Nrc   r;   r  )r  r  r   r   r   r   test_sparse_inputX  s   r  rb   rf   c                 C   s   t | |d d S )NrL   r  )r  r  r   r   r   test_sparse_input_reg_treesk  s   r  )rh   ri   rj   rk   c           	      C   s  t |  }t| d }||}t| d }|dddd||}|dddd||}t|j|jd|  t|||| |dddd	||}|dddd	||}t|j|jd|  t|||| |d|jd d d
||}|d|jd d d
||}t|j|jd|  t|||| |ddd||}|ddd||}t|j|jd|  t|||| d S )Nr_   r`   r   r8   rL   )rW   r   r   r  rC   )rW   r   r  )rW   r  r;   r   )	r6   r  r   r   r   ro   r&   r   r   )	r  r  r#  r   r_   r  r`   rz   r{   r   r   r   test_sparse_parameterss  sP   r  ztree_type, criterionc                 C      g | ]}|t v r|qS r   )r   rL  r   r   r   r   
<listcomp>      r  c                 C   r  r   )r   r  r   r   r   r    r  c           
      C   s   t |  }t| d }||}t| d }|dd|d||}|dd|d||}	t|j|	jd|  t|	||| d S )Nr_   r`   r   r;   rW   r   r   r  )r6   r  r   r   r   ro   r&   r   )
r  r  r#  r   r   r_   r  r`   rz   r{   r   r   r   test_sparse_criteria  s   
r  zcsc_container,csr_containerc                 C   s  t |  }d}d}|}t|}td}g }	g }
d}|g}t|D ]/}||d}||d | }|	| |jdd|fdd }|
| ||7 }|| q t|		tj
}	tj|tj
d}tjt|
tjd}
||
|	|f||fd}| }||
|	|f||fd}| }|jdd|fd}| }|jd	k dksJ |jd	k dksJ |d|d
||}|d|d
||}t|j|jdt ||f}t||D ]s\}}t|j||j| t|||| t|||j| t|j| |j|  t|| ||  t|| |j|  t|||| ttv rHt|||| qd S )Nr;   rC   r   rJ   r[   r8   r   r   rS   r  r  )r6   rr   r   r/   r1  binomialpermutationappendconcatenater  int32r   r  toarrayrt  copyr   rw   r   r   r   ro   r   r   r&   r   decision_pathr   r   r   )r  r#  ro  r   r   rY   rX   samplesrW   r   r   offsetindptrin_nonzero_i	indices_idata_ir  r_   r  X_testr`   rz   r{   XsX1r   r   r   r   test_explicit_sparse_zeros  sr   



r  c                 C   s   t |  }tjd d df  }tjd d df d}tj}tt |dd	|| W d    n1 s7w   Y  |dd}|	|| tt |
|g W d    d S 1 s^w   Y  d S )Nr   r  r   )r6   ra   r   r   r  r   r   r   r   r   r   )r   r   r_   X_2dr`   r   r   r   r   check_raise_error_on_1d_input  s   
"r  c                 C   s4   t   t|  W d    d S 1 sw   Y  d S N)r)   r  r   r   r   r   test_1d_input"  s   
"r  r  c                 C   s   t |  }tdgdgdgdgdgg}g d}g d}|d ur#||}|dd}|j|||d |jjdks8J |ddd}|j|||d |jjdksNJ d S )	Nr   r8   )r   r   r   r   r8   )r=   r=   r=   r=   r=   r   r   g?)rW   r  )r6   rr   r   r   r   r   )r   r  r   r_   r`   r   r   r   r   r    test_min_weight_leaf_split_level(  s   
r  c                 C   sD   t jtjjdd}t|   }|t t t|	t |j
	| d S NFr  X_smallr  r   r  r  r6   r   y_smallr'   r   r   )r   	X_small32r   r   r   r   test_public_apply_all_trees<  s   
r  ro  c                 C   sH   |t jtjjdd}t|   }|t t t|	t |j
	| d S r  r  )r   ro  r  r   r   r   r   test_public_apply_sparse_treesE  s   
r  c                  C   sP   t j} t j}tddd| |}|| d d  }t|g dg dg d S )Nr   r8   r  rL   )r8   r8   r   r8   r   r8   )ra   r   r   r   r   r  r  r'   )r_   r`   r   node_indicatorr   r   r   test_decision_path_hardcodedO  s
   r  c                    s   t j}t j}|jd }t|  }|ddd}||| ||}|   j||jj	fks/J |
|} fddt|D }t|tj|d |jjtk}	tt |	tj|d  jdd }
|jj|
kskJ d S )	Nr   rL   r  c                    s   g | ]
\}} ||f qS r   r   )rL  r  jr  r   r   r  g  s    z&test_decision_path.<locals>.<listcomp>r  r8   axis)ra   r   r   r   r6   r   r  r  r   rn   r   	enumerater&   rr   r   rq   r   r   rw   r(  r   )r   r_   r`   rX   r   r   node_indicator_csrleavesleave_indicator
all_leavesr   r   r  r   test_decision_pathW  s$   


r  c                 C   sX   t |t}}t|  }tt |dd|| W d    d S 1 s%w   Y  d S Nr   r   )X_multilabely_multilabelr6   r   r   rz  r   )r   ro  r_   r`   r   r   r   r   test_no_sparse_y_supportu  s
   "r  c                  C   s  t dddd} | jdgdgdgdgdggg dg d	d
 t| jjg d t| jjjg d | jdgdgdgdgdggg dt	dd
 t| jjg d t| jjjg d | jdgdgdgdgdggg dd t| jjg d t| jjjg d dS )aQ	  Check MAE criterion produces correct results on small toy dataset:

    ------------------
    | X | y | weight |
    ------------------
    | 3 | 3 |  0.1   |
    | 5 | 3 |  0.3   |
    | 8 | 4 |  1.0   |
    | 3 | 6 |  0.6   |
    | 5 | 7 |  0.3   |
    ------------------
    |sum wt:|  2.3   |
    ------------------

    Because we are dealing with sample weights, we cannot find the median by
    simply choosing/averaging the centre value(s), instead we consider the
    median where 50% of the cumulative weight is found (in a y sorted data set)
    . Therefore with regards to this test data, the cumulative weight is >= 50%
    when y = 4.  Therefore:
    Median = 4

    For all the samples, we can get the total error by summing:
    Absolute(Median - y) * weight

    I.e., total error = (Absolute(4 - 3) * 0.1)
                      + (Absolute(4 - 3) * 0.3)
                      + (Absolute(4 - 4) * 1.0)
                      + (Absolute(4 - 6) * 0.6)
                      + (Absolute(4 - 7) * 0.3)
                      = 2.5

    Impurity = Total error / total weight
             = 2.5 / 2.3
             = 1.08695652173913
             ------------------

    From this root node, the next best split is between X values of 3 and 5.
    Thus, we have left and right child nodes:

    LEFT                    RIGHT
    ------------------      ------------------
    | X | y | weight |      | X | y | weight |
    ------------------      ------------------
    | 3 | 3 |  0.1   |      | 5 | 3 |  0.3   |
    | 3 | 6 |  0.6   |      | 8 | 4 |  1.0   |
    ------------------      | 5 | 7 |  0.3   |
    |sum wt:|  0.7   |      ------------------
    ------------------      |sum wt:|  1.6   |
                            ------------------

    Impurity is found in the same way:
    Left node Median = 6
    Total error = (Absolute(6 - 3) * 0.1)
                + (Absolute(6 - 6) * 0.6)
                = 0.3

    Left Impurity = Total error / total weight
            = 0.3 / 0.7
            = 0.428571428571429
            -------------------

    Likewise for Right node:
    Right node Median = 4
    Total error = (Absolute(4 - 3) * 0.3)
                + (Absolute(4 - 4) * 1.0)
                + (Absolute(4 - 7) * 0.3)
                = 1.2

    Right Impurity = Total error / total weight
            = 1.2 / 1.6
            = 0.75
            ------
    r   r3   rL   )rW   r   r   r;   r:   rK   )rI   r  r;   r7   r;   )333333?333333?r   rN   r  )r_   r`   r   )g,d?gܶm۶m?g?)      @g      @r  )ffffff?rM   gUUUUUU?)r7   rT   r  r^   N)
r   r   r   r   rx   r'   ry   r  rr   r   )dt_maer   r   r   test_mae  s    J.&r  c                  C   s   d} t jdt jd}d}dd }tjtj|fD ][}t D ]'\}}|| |}|| }|\}	\}
}}||	ks9J | |
ks?J t|| qt	 D ](\}}|| |}|| }|\}	\}
}}||	kseJ | |
kskJ ||ksqJ qIqd S )Nr;   r   r-  c                 S   s   t t | S r  )rQ  rS  rR  )objr   r   r   _pickle_copy  s   z)test_criterion_copy.<locals>._pickle_copy)
rr   r   intpr  deepcopyr   r   
__reduce__r'   r   )	n_outputsrJ  rX   r  	copy_func_typenamecriteriaresult	typename_
n_outputs_ra  
n_samples_r   r   r   test_criterion_copy  s*   

r  c           	      C   s   t jdddd }t |d}|d d d df }| d ur&| |}|d d df }tdd||}||}t	t 
|jjtkd }||}t 
t |jj d }t|dkscJ t|dkskJ d S )Nr   r-  rH   g*Gr  r>   r   )rr   rw  RandomStater  
nan_to_numr  r   r   r   setwherer   rq   r   
differenceisfiniteru   r   )	r  r   r_   r`   r   terminal_regions	left_leaf
empty_leafinfinite_thresholdr   r   r   "test_empty_leaf_infinite_threshold  s   

r  tree_clsc                 C      t |  } | d | d }}|ddd}|||}|j}|j}tt|dks+J tt|dks7J t|||| d S Nr_   r`   rZ   r   r/  r  cost_complexity_pruning_path
ccp_alphas
impuritiesrr   r   diffassert_pruning_creates_subtreer  r  r_   r`   r   infopruning_pathr  r   r   r   'test_prune_tree_classifier_are_subtrees  s   r  c                 C   r  r  r  r  r   r   r   'test_prune_tree_regression_are_subtrees$  s   r  c                  C   sX   t dd} | dgdggddg t ddd}|dgdggddg t| j|j d S )Nr   r   r8   rC   )rW   	ccp_alpha)r   r   assert_is_subtreer   )r  r   r   r   r   test_prune_single_node_tree5  s
   
r"  c           	      C   s\   g }|D ]}| d|dd ||}|| qt||dd  D ]\}}t|j|j q d S )NrZ   r   )r   r   rW   r8   )r   r  zipr!  r   )	estimator_clsr_   r`   r  
estimatorsr   r   prev_estnext_estr   r   r   r  A  s   r  c           	      C   s  | j |j ksJ | j|jksJ | j}| j}|j}|j}dg}|r| \}}t| j| |j|  t| j| |j|  t| j	| |j	|  t| j
| |j
|  || || krdtt|j|  n!t| j| |j|  ||| || f ||| || f |s!d S d S )N)r   r   )rn   r   rq   rp   popr&   ry   r%   rx   rv   r2  r   ru   r  )	r   subtreetree_c_lefttree_c_rightsubtree_c_leftsubtree_c_rightstacktree_node_idxsubtree_node_idxr   r   r   r!  P  s@   r!  r  r  rw  c                 C   s   t d }|d jtjjdd}|d u rt|}n!||d }tj|jtjjd|_t|j|j	|j
f\|_|_	|_
ttjttjjd}t|  |d}||| t|||| t|| ||  d S )Nre   r_   Fr  r   )r  )r  r  r   r  r  r(   rr   r   r   r   r  r  r6   r   r'   r   r  todense)r   r  r  r  r  
X_readonly
y_readonlyr   r   r   r   "test_apply_path_readonly_all_treesx  s(   
r4  )r2   r4   r5   c                 C   sL   t jt j}}|| d}||| t||tt|ks$J d S )Nr   )	rb   r   r   r   rr   rw   r   r   r   )r   r#   r_   r`   r   r   r   r   test_balance_property  s   
(r5  seedc              	   C   s  ddgddgddgddgddgddgddgddgg}g d}t d| d}||| t||dks6J t d| d}||| t||dksNJ d	}tj|d d d
d||d d | d\}}d|d|k |dk @ < t|}t d| d}||| t||dksJ d S )Nr   r8   rL   r;   )r   r   r   r   r8   rL   r;   r7   r2   r   r5   rC   r  r   )effective_ranktail_strengthrX   rY   r   rW   r>   )	r   r   rr   aminr   r   r   make_regressionr   )r6  r_   r`   r   rY   r   r   r   test_poisson_zero_nodes  s,   4


	
r;  c                  C   sB  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
dd| d}t
dd| d}|||	 |||	 tdd||	}||	df||
dffD ]6\}}}t|||}t|t ||dd }t|||}|dkr|d| k sJ |d| k sJ qhd S )Nr   )  r<  rC   rX   rY   rW   rA   rL   )lowhighr\   r   r  )lam)	test_sizerW   r5   rC   )r   r  rW   r2   mean)strategytraintestgV瞯<rJ   g      ?)rr   rw  r  r   make_low_rank_matrixuniformr(  r5   r   r   r   r   r	   r   r   clip)r  n_trainn_testrY   r_   coefr`   X_trainr  r   r   tree_poitree_msedummyval
metric_poi
metric_msemetric_dummyr   r   r   test_poisson_vs_mse  s6   

rT  rJ  c                 C   sz   d\}}t j||||ddd\}}| ddd||}| ddd||}t|j|j| d	 t|||| d
S )z3Test that criterion=entropy gives same as log_loss.)rq  r:   r   r   )rJ  rX   rY   r   r   rW   r1   +   r   entropyz> with criterion 'entropy' and 'log_loss' gave different trees.N)r   r   r   r   r   r   r   )r#   rJ  rX   rY   r_   r`   tree_log_losstree_entropyr   r   r   'test_criterion_entropy_same_as_log_loss  s"   
rY  c                     sv   t jdd\} }tddd  | |  | |}dd  fdd}t| }|| |}t||s9J d S )	Nr   r   r;   r  c                 S   s   |   | j  S r  )byteswapviewr   newbyteorderr  )arrr   r   r   reduce_ndarray  s   z8test_different_endianness_pickle.<locals>.reduce_ndarrayc                     sB   t  } t| }tj |_|jtj< |	  | 
d | S Nr   )ioBytesIOrQ  Picklercopyregdispatch_tabler  rr   ndarraydumpseekfpr   r^  r   r    get_pickle_non_native_endianness  s   


zJtest_different_endianness_pickle.<locals>.get_pickle_non_native_endianness)	r   r   r   r   r   rQ  loadrr   isclose)r_   r`   r   rl  new_clf	new_scorer   rk  r    test_different_endianness_pickle  s   
rq  c                     s~   t jdd\} }tddd| | | |}G dd dt  fdd}t| }|| |}t	||s=J d S )	Nr   r   r;   r  c                       s   e Zd Z fddZ  ZS )zPtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPicklerc                    s0   t |tjr| |j }t | d S r  )	
isinstancerr   re  rZ  r[  r   r\  supersave)selfr  rU  r   r   rt  (  s   zUtest_different_endianness_joblib_pickle.<locals>.NonNativeEndiannessNumpyPickler.save)__name__
__module____qualname__rt  __classcell__r   r   rv  r   NonNativeEndiannessNumpyPickler'  s    r{  c                     s(   t  }  | }| | d | S r_  )r`  ra  rf  rg  rh  r{  r   r   r   'get_joblib_pickle_non_native_endianness-  s
   

zXtest_different_endianness_joblib_pickle.<locals>.get_joblib_pickle_non_native_endianness)
r   r   r   r   r   r   joblibrm  rr   rn  )r_   r`   r   r}  ro  rp  r   r|  r   'test_different_endianness_joblib_pickle   s   r  c                 C   sn   t rtjntj}g d}dd | jj D }|D ]}|||< qtt| t|	 d}| j
|ddS )N)
left_childright_childrt   rv   c                 S      i | ]	\}\}}||qS r   r   rL  r   r   r  r   r   r   rO  @  rP  z6get_different_bitness_node_ndarray.<locals>.<dictcomp>namesformats	same_kindcasting)r+   rr   int64r  r   fieldsr   listr  valuesr  )node_ndarraynew_dtype_for_indexing_fieldsindexing_field_namesnew_dtype_dictr   	new_dtyper   r   r   "get_different_bitness_node_ndarray:  s   

r  c                 C   sj   dd | j j D }dd | j j D }dd |D }t t| t| |d}| j|ddS )	Nc                 S   r  r   r   r  r   r   r   rO  M  rP  z8get_different_alignment_node_ndarray.<locals>.<dictcomp>c                 S   s   g | ]\}}|qS r   r   )rL  r   r  r   r   r   r  P      z8get_different_alignment_node_ndarray.<locals>.<listcomp>c                 S   s   g | ]}d | qS )rK   r   )rL  r  r   r   r   r  Q  r  )r  r  offsetsr  r  )r   r  r   r  rr   r  r  r  )r  r  r  shifted_offsetsr  r   r   r   $get_different_alignment_node_ndarrayL  s   


r  c           	      C   sZ   t rtjntj}|  \}\}}}}|j|dd}| }t|d |d< ||||f|fS )Nr  r  nodes)r+   rr   r  r  r  r  r  r  )	r   r  r  rY   rJ  r  statenew_n_classes	new_stater   r   r   "reduce_tree_with_different_bitness]  s   r  c                     n   t jdd\} }tddd  | |  | |} fdd}t| }|| |}|t|ks5J d S )Nr   r   r;   r  c                     s@   t  } t| }tj |_t|jt< |	  | 
d | S r_  )r`  ra  rQ  rb  rc  rd  r  r  
CythonTreerf  rg  rh  r   r   r   "pickle_dump_with_different_bitnesso  s   



zItest_different_bitness_pickle.<locals>.pickle_dump_with_different_bitness)	r   r   r   r   r   rQ  rm  r   r   )r_   r`   r   r  ro  rp  r   r   r   test_different_bitness_pickleh  s   
r  c                     r  )Nr   r   r;   r  c                     s>   t  } t| }tj |_t|jt< |  | 	d | S r_  )
r`  ra  r   rc  rd  r  r  r  rf  rg  rh  r   r   r   "joblib_dump_with_different_bitness  s   


zPtest_different_bitness_joblib_pickle.<locals>.joblib_dump_with_different_bitness)	r   r   r   r   r   r~  rm  r   r   )r_   r`   r   r  ro  rp  r   r   r   $test_different_bitness_joblib_pickle~  s   
r  c                  C   s  t rttjnttj} ttjttjg}|dd |D 7 }tjddg| d}|D ]
}t|||  q.tj	t
dd tjddgg| d}t||  W d    n1 sZw   Y  tj	t
dd |tj}t||  W d    d S 1 s}w   Y  d S )	Nc                 S   s   g | ]}|  qS r   )r\  )rL  dtr   r   r   r    r  z(test_check_n_classes.<locals>.<listcomp>r   r8   r   zWrong dimensions.+n_classesr   zn_classes.+incompatible dtype)r+   rr   r   r  r  r   r    r  r   r   r   rm  )expected_dtypeallowed_dtypesrJ  r  wrong_dim_n_classeswrong_dtype_n_classesr   r   r   test_check_n_classes  s   "r  c               	   C   s0  t t j} d}t j|| d}| |  g}|D ]	}t|||d qtjtdd t|| dd W d    n1 s:w   Y  |d d d d d df t 	|fD ]!}tjtdd t|| |j
d W d    n1 smw   Y  qQtjtd	d t|t j| |d W d    d S 1 sw   Y  d S )
N)r:   r8   rL   r   )r  expected_shapezWrong shape.+value arrayr   )r8   rL   r8   zvalue array.+C-contiguouszvalue array.+incompatible dtype)rr   r   rm  rk   r\  r"   r   r   r   r   r   r  r  )r  r  value_ndarrayr  r  problematic_arrr   r   r   test_check_value_ndarray  s:   (
"r  c                  C   s  t } tjd| d}|t|t|g}|dd |D 7 }|D ]}t|| d qtjtdd tjd| d}t|| d W d    n1 sEw   Y  tjtd	d |d d d
 }t|| d W d    n1 siw   Y  dd |j	j
 D }| }tj|d< t	t| t| d}||}tjtdd t|| d W d    n1 sw   Y  | }tj|d< t	t| t| d}||}tjtdd t|| d W d    d S 1 sw   Y  d S )N)r:   r   c                 S   s   g | ]
}| |j qS r   )r  r   r\  )rL  r]  r   r   r   r    s    z+test_check_node_ndarray.<locals>.<listcomp>)r  zWrong dimensions.+node arrayr   )r:   rL   znode array.+C-contiguousrL   c                 S   r  r   r   r  r   r   r   rO    s    z+test_check_node_ndarray.<locals>.<dictcomp>ru   r  znode array.+incompatible dtyper  )r   rr   rk   r  r  r!   r   r   r   r   r  r   r  r  r  r  r  r  rm  )r  r  valid_node_ndarraysr]  problematic_node_ndarray
dtype_dictr  r  r   r   r   test_check_node_ndarray  sL   



"r  Splitterc           	      C   s   t jd}d}dt jddgt jd}}td ||}| ||dd|d	d
}t|}t|}|j	|ks7J t
|| s>J d	S )z&Check that splitters are serializable.r   rC   rL   r;   r   r0   r:   rJ   N)monotonic_cst)rr   rw  r  r   r  r   rQ  rR  rS  r   rr  )	r  r  r   r  rJ  r   r  splitter_serializesplitter_backr   r   r   test_splitter_serializable 	  s   

r  c                 C   sR   t | d}tdd}|tt t|| tj|dd}t	|j
|j
d dS )zhCheck that Trees can be deserialized with read only buffers.

    Non-regression test for gh-25584.
    z
clf.joblibr   r   r)	mmap_modez?The trees of the original and loaded classifiers are not equal.N)strjoinr   r   r  r  r~  rf  rm  r   r   )tmpdirpickle_pathr   
loaded_clfr   r   r   /test_tree_deserialization_from_read_only_buffer	  s   
r  c                 C   s   t ddgddgg}t ddg}| dd|| | dd}d}tjt|d ||| W d   dS 1 s;w   Y  dS )zhCheck that an error is raised when min_sample_split=1.

    non-regression test for issue gh-25481.
    r   r8   rN   )r  zb'min_samples_split' .* must be an int in the range \[2, inf\) or a float in the range \(0.0, 1.0\]r   N)rr   r   r   r   r   r   )r#   r_   r`   r   msgr   r   r   test_min_sample_split_1_error%	  s   
"r  c                 C   s   t g dgj}t g d}tdd| d}||| |t jgg}t|t |dd g |dd }|dd }tdd| d}||| |t jgg}t|t |d	d g dS )
z=Check missing values goes to correct node during predictions.	r   r8   rL   r;   rK   r  rH      r   	r   r=   r  r=   r  r  rM   g?g@r   r8   r  r<   Nr>   r9   )	rr   r   r   r   r   r   nanr   rB  )r   r_   r`   dtcr  X_equaly_equalr   r   r   ;test_missing_values_best_splitter_on_equal_nodes_no_missing;	  s   r  c                 C   s   t g dgj}t g d}t|d| d}||| |jjd }|jjd }|jj| }|jj| }||k}	|jj	| d }
|jj	| d }|
t jgg}|	rZt|
| dS t|| dS )zCheck missing values go to the correct node during predictions for ExtraTree.

    Since ETC use random splits, we use different seeds to verify that the
    left/right node is chosen correctly when the splits occur.
    r  r  r8   r  r   N)rr   r   r   r   r   r   rq   rp   r2  ry   r   r  r   )r   r6  r_   r`   etrr  r  left_samplesright_samples	went_lefty_pred_lefty_pred_rightr  r   r   r   =test_missing_values_random_splitter_on_equal_nodes_no_missingU	  s   r  rV  r0   c                 C   s   d}t t jgd g d gj}t |gd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t||ddg d
S )zITest when missing values are uniquely present in a class among 3 classes.r   r7   )r   r8   rL   r;   rK   r  rH   r  r8   rL   r   r  r;   r  Nrr   r   r  r   r   r   r   r'   )r   missing_values_classr_   r`   r  r  
y_nan_predr   r   r   /test_missing_values_best_splitter_three_classesx	  s   $
r  c                 C   s   t t jgd g d gj}t dgd dgd  }tdd| d}||| t t jd	t jggj}||}t|g d
 dS )zMissing values spanning only one class at fit-time must make missing
    values at predict-time be classified has belonging to this class.r7   r   r8   rL   r;   r7   r:   r   r8   rI   r   rL   r  r:   )r   r8   r   Nr  r   r_   r`   r  r  r  r   r   r   )test_missing_values_best_splitter_to_left	  s   
r  c                 C   s   t t jgd g d gj}t dgd dgd  dgd  }tdd| d}||| t t jdd	ggj}||}t|g d
 dS )zMissing values and non-missing values sharing one class at fit-time
    must make missing values at predict-time be classified has belonging
    to this class.r7   r  r8   r   rL   r   r  rO   g333333@r  Nr  r  r   r   r   *test_missing_values_best_splitter_to_right	  s   $
r  c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }td
d| d}||| t t jddggj}||}t|g d dS )zNCheck behavior of missing value when there is one missing value in each class.r8   rL   r;   r:   rC   rZ   rV   r   r   r   r  gffffff@gA@r  Nr  r  r   r   r   >test_missing_values_best_splitter_missing_both_classes_has_nan	  s   &
r  r   r   c                 C   s   t ddddt jddddt jg
gj}t d	gd dgd  }| d
ur(| |}tjtdd ||| W d
   d
S 1 sAw   Y  d
S )z4Check unsupported configurations for missing values.r8   rL   r;   r:   rC   rZ   rV   r   r   NzInput X contains NaNr   )rr   r   r  r   r   r   r   r   )r  r   r_   r`   r   r   r   test_missing_value_errors	  s   &"r  c                 C   sp   t j t j}}tj|ddddf< tj|ddddf< | ddd}||| ||}|d	k s6J dS )
z5Smoke test for poisson regression and missing values.Nr:   r   rI   r>   r5   r   r   rS   )	rb   r   r  r   rr   r  r   r   r   )r#   r_   r`   r   r  r   r   r   test_missing_values_poisson	  s   
r  c                  O   s$   t j| i |\}}|dk}||fS )N   )r   make_friedman1)argskwargsr_   r`   r   r   r   make_friedman1_classification	  s   r  zmake_data, Tree, tolerancegQ?gQ?gQ?sample_weight_trainr   c                 C   s  d\}}| ||d|d\}}|  }	tj|}
tj|	|
jddg|jddgd< t|	||d	\}}}}|d
kr@t|jd }nd}d}|||d}|j	|||d |
||}tt |||d}|	|| |
||}|| |ksJ d|d| d| dS )zFCheck that trees can deal with missing values have decent performance.)r   rC   rN   )rX   rY   noiserW   FTrU   r   r\   rj  r   r   r   NrC   r   r   zscore_native_tree=z + z! should be strictly greater than )r  rr   rw  r  r  choicer   r   r   r   r   r   r   )	make_datar#   r  r3  	tolerancerX   rY   r_   r`   	X_missingr  X_missing_trainX_missing_testr   r   r   r   native_treescore_native_treetree_with_imputerscore_tree_with_imputerr   r   r   !test_missing_values_is_resilience	  s<   
 r  zTree, expected_scoreg333333?g(\?c                 C   s   t jd}d}|j|dfd}t t |d t |d g}|jddg|dd	gd
}| 	t
}||  ||< |j|d}	t j|	|< |	|dddf< | |d}
t|
||dd }||kskJ d| d| dS )z@Check the tree learns when only the missing value is predictive.r   r<  rZ   r[   rL   FTgffffff?rP   r  Nr:   r   )cvzExpected CV score: z	 but got )rr   rw  r  standard_normalr  rk   r   r  r  r  boolr  r   rB  )r#   expected_scorer3  r  rX   r_   r`   X_random_masky_maskX_predictiver   tree_cv_scorer   r   r    test_missing_value_is_predictive 
  s   "


r  zmake_data, Treec           
      C   s   t jd}d\}}| |||d\}}t j||jddg|jddgd< t |jd }d	|d
d
d< |dd}|j|||d |dd}	|	|dd
dd
d
f |dd
d  t|		||	| d
S )z=Check sample weight is correctly handled with missing values.r   )r   rC   r=  FTrU   r   r  rS   NrL   r   r   r8   )
rr   rw  r  r  r  r   r   r   r   r   )
r  r#   r  rX   rY   r_   r`   r   tree_with_swtree_samples_removedr   r   r   test_sample_weight_non_uniform>
  s   	 

(r  c                  C   sP   t ddtjtj} t ddtjtj}t| }t|}||ks&J d S r  )r   r   ra   r   r   rQ  rR  )tree1tree2pickle1pickle2r   r   r   test_deterministic_pickle[
  s
   

r  r_   r:   rI   c                 C   s   | dd}td}| |dd||}t|| dd|}|jj}t|dks1J | t	|jjdd |jjdd  t
|jjdk|jjdk@ }t	|jj| d dS )	a'  Check that we properly handle missing values in regression trees using a toy
    dataset.

    The regression targeted by this test was that we were not reinitializing the
    criterion when it comes to the number of missing values. Therefore, the value
    of the critetion (i.e. MSE) was completely wrong.

    This test check that the MSE is null when there is a single sample in the leaf.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    https://github.com/scikit-learn/scikit-learn/issues/28316
    r>   r8   rI   r   r   NrL   rS   )r  rr   r   r   r   r   rx   r   r   r   flatnonzerorq   rv   )r#   r_   r   r`   r   tree_refrx   
leaves_idxr   r   r   'test_regression_tree_missing_values_toyh
  s   
"r  c                 C   s   t j| }d}t j|t jddd}t j|dd d d f< || t |}t| dd	||}|j
j}t|dksAJ |d S )	Nr-  r   r>   r8   ir:   r  r   )rr   rw  r  r   rm  r  r  r   r   r   r   rx   r   )r3  r  rX   r_   r`   r   rx   r   r   r   -test_regression_extra_tree_missing_values_toy
  s   

r  c                  C   s   t jdd\} }tjd}|  }|jtjdtjd| dddgf d d		t
}tj||< t||d
d\}}}}tjg dtjd}tdddd}	|	|| ||  t|	jjdks`J t|	jjdk|	jjdk@ }
t|	jj|
 d dS )a  Check that we properly handle missing values in clasification trees using a toy
    dataset.

    The test is more involved because we use a case where we detected a regression
    in a random forest. We therefore define the seed and bootstrap indices to detect
    one of the non-frequent regression.

    Here, we check that the impurity is null or positive in the leaves.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28254
    T)
return_X_yr   )r8   r7   )r   r   NrL   rK   )nrj     r   )prL   Q   '   a   [   &   .      e   r  Y   R   r-  r   E      r     I   J   3   /   k      K   n   rZ   r   h   9      r   r  O   #   M   Z   r  r  r  ^   r	     rK   ]   r$  r  r   r  r  r  m   r     rC   r#  r  r  \   4   rZ   r%  rK   rK      r  r  r  r  r  rU  r   rV   r  N   r  r&  i   rq  r   r  r  f   r+  r  r  r8   r  rH       r  r!  j   r"  r   8   r  r  >   U   r  r  P   r  ?   rI   r2  T   r;   r;   L   r*  r   r;   r   iHnr   r   r>   r8   rS   )r   	load_irisrr   rw  r  r  r  r   r  r  r  r  r   r   r   r   r   r   rx   r  rq   rv   r   )r_   r`   r  r  maskrL  r  r   r   r   r   r   r   r   +test_classification_tree_missing_values_toy
  s,   "


r8  c                  C   sH  t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< t|| j| | jjdks9J |jdks@J tt t| jj|j W d   n1 sXw   Y  t| jjd |jd  t| j	|| j
}tj| jjtjd}d|dd< t|| j| | jjdksJ |jdksJ |jt| jj|j dS )zHTest pruning a tree with the Python caller of the Cythonized prune tree.r   r8   r  r   r;   N)r   r   ra   r   r   rr   
atleast_1dra  r  n_features_in_r  rk   r   rn   uint8r   r   r   AssertionErrorr'   ry   r   rJ  pruned_treeleave_in_subtreer   r   r   test_build_pruned_tree_py
  s(   r@  c                  C   s   t ddd} | tjtj t| j}t| j	|| j
}tj| jjtjd}d|d< tjtdd t|| j| W d   dS 1 sDw   Y  dS )z8Test pruning a tree does not result in an infinite loop.r   r8   r  r   z,Node has reached a leaf in the original treer   N)r   r   ra   r   r   rr   r9  ra  r  r:  r  rk   r   rn   r;  r   r   r   r   r=  r   r   r   $test_build_pruned_tree_infinite_loop
  s   "rA  c                  C   s`   t jd} | jddddt j}t |gd }t d}t||d g d}t	|| d	S )
zNon-regression test for gh-30554.

    Using log2 and log in sort correctly sorts feature_values, but the tie breaking is
    different which can results in placing samples in a different order.
    r  rS   g      $@rC   )locscaler\   r:   rq  )2r   (   rV   rZ   rC      r     1   r  -   r   r  r:      rH   r  )   r8         r  rL   r   r-  r#  r  rU  r;   !   rI   $   r  r  r  r7   r  r&  "   ,   r  r  r  %   r  rK   r
  0   r)     N)
rr   rw  default_rngnormalr  r  r  r   r   r'   )r  somefeature_valuesr  expected_samplesr   r   r   test_sort_log2_build  s   
rY  r  )__doc__r  rc  r`  rQ  rx  r  	itertoolsr   r   r~  numpyrr   r   joblib.numpy_pickler   numpy.testingr   sklearnr   r   r   sklearn.dummyr	   sklearn.exceptionsr
   sklearn.imputer   sklearn.metricsr   r   r   sklearn.model_selectionr   r   sklearn.pipeliner   sklearn.random_projectionr   sklearn.treer   r   r   r   sklearn.tree._classesr   r   r   r   sklearn.tree._partitionerr   sklearn.tree._treer   r   r   r   r    r!   r"   r#   r  sklearn.utilsr$   sklearn.utils._testingr%   r&   r'   r(   r)   r*   sklearn.utils.fixesr+   r,   r-   r.   sklearn.utils.validationr/   r   REG_CRITERIONSr   r   dictr6   __annotations__updateSPARSE_TREESr   r  r  y_small_regr_   r`   r   r   r6  ra   rw  r  r  r  r   r\   permr   load_diabetesrb   load_digitsrc   rW   make_multilabel_classificationr  r  rG  X_sparse_posrt  y_randomr  X_sparse_mixrk   r  r   r   r   markparametrizer  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r!  r'  r)  r+  r,  rG  rZ  r`  rd  ri  rp  rv  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  sortedr	  intersectionr  r  r  r  r#  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r  r!  r4  r5  r1  r;  rT  rY  rq  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r:  r   r  r  r  r  r  r8  r@  rA  rY  r   r   r   r   <module>   s   $	 

(

'



	
$B!;
=
H3<-4
/
		
)3
K


d
(
!*
!4!




 -!/