o
    `^hc                  
   @   sv  d dl Zd dlZd dlmZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZmZ d dlmZmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ e  Z!d
d Z"dd Z#dd Z$dd Z%ej&'dg dej&'dddgdd Z(dd Z)ej&'dedd Z*ej&j'dej+g dd d!ej+g d"d d!ej+d#gd d!fej+g d$e,d!ej+g d%e,d!ej+d&ge,d!fe+g d$e+g d%e+d&gfgg d'd(d)d* Z-d+d, Z.ej&'dd-d.gd/d0 Z/d1d2 Z0ej&j'd3ej+g dd d!ej+g d$e,d!e+g d$gg d'd(d4d5 Z1d6d7 Z2ej&'ded8d9 Z3d:d; Z4d<d= Z5d>d? Z6d@dA Z7dBdC Z8dDdE Z9dFdG Z:dHdI Z;dJdK Z<dLdM Z=dNdO Z>dPdQ Z?dRdS Z@ej&'dTej+ge e e e e dUdV ZAdWdX ZBej&'dedYdZ ZCd[d\ ZDej&'d]e
 e	 e gd^d_ ZEej&'d`e ej&'dae+g de+g dbe+g dcgddde ZFdS )f    N)issparse)config_contextdatasets)LabelBinarizerLabelEncoderMultiLabelBinarizer_inverse_binarize_multiclass_inverse_binarize_thresholdinglabel_binarize)_convert_to_numpyget_namespace)yield_namespace_device_dtype_combinations)_array_api_for_testsassert_array_equal)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSDOK_CONTAINERSLIL_CONTAINERS)type_of_target)_to_object_arrayc                 C   s   t | dr	|  } | S )Ntoarray)hasattrr   a r   d/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/sklearn/preprocessing/tests/test_label.pyr   $   s   
r   c                  C   s  g d} t dd}tg dgj}|| }t|jdg t|| t|||  t dd}|| }t|s;J t|jdg t||	  t||	 |  t dd}g d} tg dgj}|| }t|jd	dg t|| td
dgdd
gdd
gd
dgg}t|||  g d} tg dg dg dg dg dg}|| }t|jg d t|| t|||  d S )N)posr   r   r   Fsparse_outputr   r   r   r   r   T)negr   r   r!   r      r#   r   r!   r#   r   )spamhameggsr%   0r   r   r   r#   r   r   r#   r   r   r#   r   r   r#   r   r   r   )r'   r&   r%   r$   )
r   nparrayTfit_transformr   classes_inverse_transformr   r   )inplbexpectedgot	to_invertr   r   r   test_label_binarizer*   s:   







" 

r7   c               	   C   s   t  } tg dg dg dg}| g d}t|| tg dg dg dg dg dg dg}| g d}t|| d S )Nr#   r   r   r   r#   r   r   r   r#   )bder   r   r   )r   r;   cr<   r=   f)r   r,   r-   r/   r   	transform)r3   r4   r5   r   r   r   "test_label_binarizer_unseen_labelsS   s   
&rB   c                  C   s   t ddd} tg d}tg dgj}| |}t|| t| || t ddd} tg d}tg dg d	g d
g d	g dg}| |}t|| t| || d S )Nr   	neg_label	pos_labelr"   )rC   r   r   rC      )   rG   r#   rG   r   )rC   rC   rC   rG   )rC   rC   rG   rC   )rC   rG   rC   rC   )rG   rC   rC   rC   )r   r,   r-   r.   r/   r   r1   )r3   r2   r4   r5   r   r   r   'test_label_binarizer_set_label_encodinga   s&   


	
rI   dtype)Int64Float64booleanunique_firstTFc                 C   sX   t d}|jg d| d}|r| }t |}|ddg}t|dgdgg dS )ziChecks that LabelBinarizer works with pandas nullable dtypes.

    Non-regression test for gh-25637.
    pandas)	r#   r   r   r#   r   r#   r#   r   r#   rJ   r#   r   N)pytestimportorskipSeriesuniquer   fitrA   r   )rJ   rN   pdy_truer3   y_outr   r   r   $test_label_binarizer_pandas_nullable}   s   
rY   c                  C   s  t g d} t | }g d}d}tjt|d || W d    n1 s*w   Y  t }d}tjt|d |g  W d    n1 sKw   Y  tjt|d |g  W d    n1 sgw   Y  g d}d}tdd	d
}tjt|d || W d    n1 sw   Y  d}tddd
}tjt|d || W d    n1 sw   Y  d}td	ddd}tjt|d || W d    n1 sw   Y  g d	dgdgg ddgg}d}tjt|d t 	| W d    n	1 sw   Y  d}tjt|d t
t g dg dgdg ddd W d    n	1 s2w   Y  d}tjt|d t t d	dgdd	gg W d    n	1 s[w   Y  tjt|d tt d	dgdd	ggg dd W d    d S 1 sw   Y  d S )Nr    )rG   rH   )r   )r   rG   z@You appear to be using a legacy multi-label data representation.matchz.This LabelBinarizer instance is not fitted yet)r   r#   r   r#   z3neg_label=2 must be strictly less than pos_label=1.rG   r#   rD   z3neg_label=2 must be strictly less than pos_label=2.zqSparse binarization is only supported with non zero pos_label and zero neg_label, got pos_label=2 and neg_label=1TrE   rF   r   rH   )r   r#   rH   z?You appear to be using a legacy multi-label data representationz!output_type='binary', but y.shaper#   rG   rH   )rG   r#   rH   binaryr   youtput_typeclasses	thresholdz@Multioutput target data is not supported with label binarizationrc   )r,   r-   r   rU   rQ   raises
ValueErrorrA   r1   r/   r	   r
   )	one_classr3   multi_labelerr_msginput_labelsy_seq_of_seqsr   r   r   test_label_binarizer_errors   sj   	 $$rm   csr_containerc                 C   s   d}t jt|d t| ddgddggdddgdd W d    n1 s&w   Y  d}t jt|d t| ddgddggdg d	dd W d    d S 1 sRw   Y  d S )
Nzfoo format is not supportedr[   r#   rG   foor   r`   zAThe number of class is not equal to the number of dimension of y.r^   )rQ   rf   rg   r	   )rn   rj   r   r   r   "test_label_binarizer_sparse_errors   s$   	"rp   zvalues, classes, unknown)rG   r#   rH   r#   rH   int64rP   r^      )r;   r   r?   r   r?   r   r;   r?   r<   )rq   objectstr)idsc                 C   s   t  }||  t|j| t|| g d t|g d|  t  }|| }t|g d tjt	dd || W d    d S 1 sIw   Y  d S )N)r#   r   rG   r   rG   zunseen labelsr[   )
r   rU   r   r0   rA   r1   r/   rQ   rf   rg   )valuesrc   unknownleretr   r   r   test_label_encoder   s   

"r{   c                  C   s   t  } | g d t| jg d t| g dg d t| g dg d tt | ddg W d    d S 1 sBw   Y  d S )Nr#   r#   rr      r   )r~   r   r#   rr   r}   )r   r#   rr   rr   r}   r~   r~   )r#   rG   rH   rH   rr   r   r   r      )	r   rU   r   r0   rA   r1   rQ   rf   rg   )ry   r   r   r    test_label_encoder_negative_ints  s   "r   ru   rt   c                 C   s`   t  }|tjddg| d d}tjt|d |d W d    d S 1 s)w   Y  d S )NappleorangerP   zshould be a 1d arrayr[   )r   rU   r,   r-   rQ   rf   rg   rA   )rJ   ry   msgr   r   r    test_label_encoder_str_bad_shape  s   "r   c                  C   s>  t  } tt | g  W d    n1 sw   Y  tt | g  W d    n1 s2w   Y  t  } | g d d}tjt|d | dg W d    n1 s[w   Y  tjt|d | g d W d    n1 syw   Y  d}tjt|d | d W d    d S 1 sw   Y  d S )N)r#   rG   rH   r~   r#   z!contains previously unseen labelsr[   rC   )rC   z should be a 1d array.+shape \(\) )r   rQ   rf   rg   rA   r1   rU   )ry   r   r   r   r   test_label_encoder_errors  s(   "r   rw   c                 C   sH   t  }||  |g }ttg | |g }ttg | d S )N)r   rU   rA   r   r,   r-   r1   )rw   ry   transformedinverse_transformedr   r   r   test_label_encoder_empty_array3  s   



r   c                  C   s@  dd dd dd g} t g dg dg dg}| d  }d	D ]}}| D ]x}t|d
}|| }t||ks9J |rI|jj|jjksEJ | }t	|| t	g d|j
 |||ks_J t|d
}|| | }t||ksvJ |r|jj|jjksJ | }t	|| t	g d|j
 |||ksJ q$q d S )Nc                   S      g dS NrZ   r#   )r#   rG   r   r   r   r   r   <lambda>J      z9test_sparse_output_multilabel_binarizer.<locals>.<lambda>c                   S      ddhdhddhfS NrG   rH   r#   r   r   r   r   r   r   K      c                   S      t t dt dddhgS NrZ   r   r#   rG   iterr   r   r   r   r   L      r   r#   r#   r8   r#   r#   r   r   TFr   r^   )r,   r-   r   r/   r   indicesrJ   indptrr   r   r0   r1   rU   rA   )inputsindicator_matinverser   r2   mlbr5   r   r   r   'test_sparse_output_multilabel_binarizerG  s8   




r   c              
   C   s   t t dt dddhg}tdd}|| tt || tg dg dg d	g W d    d S 1 s;w   Y  d S )
NrZ   r   r#   rG   Fr   r   )rG   r   r   r   )	r   r   rU   rQ   rf   rg   r1   r,   r-   )rn   r2   r   r   r   r   .test_sparse_output_multilabel_binarizer_errorsl  s   

"r   c                  C   s   dd dd dd g} t g dg dg dg}| d  }| D ]D}t }|| }t|| tg d	|j |||ksAJ t }|| | }t|| tg d	|j |||ksdJ q d S )
Nc                   S   r   r   r   r   r   r   r   r   z  r   z+test_multilabel_binarizer.<locals>.<lambda>c                   S   r   r   r   r   r   r   r   r   {  r   c                   S   r   r   r   r   r   r   r   r   |  r   r   r8   r   r   r^   )	r,   r-   r   r/   r   r0   r1   rU   rA   )r   r   r   r2   r   r5   r   r   r   test_multilabel_binarizerw  s$   


r   c                  C   sF   t  } ddgdgg g}tddgddgddgg}t| || d S )Nr#   rG   r   )r   r,   r-   r   r/   )r   ra   Yr   r   r   &test_multilabel_binarizer_empty_sample  s   r   c                  C   s   t  } ddgg}tddgddgg}d}tjt|d | |ddgddgg}W d    n1 s5w   Y  tg dg dg}t g d	d
} tjt|d | |ddgddgg}W d    n1 slw   Y  t|| d S )Nr#   rG   r   zunknown class.* will be ignoredr[   rr   r8   r9   r^   re   )	r   r,   r-   rQ   warnsUserWarningrU   rA   r   )r   ra   r   warning_messagematrixr   r   r   'test_multilabel_binarizer_unknown_class  s   
r   c                  C   sF  g d} t g dg dg dg}tg dd}t|| | t|jg d tg dd}t|| | | t|jg d tg dd}t|| t dgdgdgg|f t|jg d t	| } tg dd}t|| | | d	}tg d
d}t
jt|d ||  W d    d S 1 sw   Y  d S )Nr   r   r8   r#   r   r#   r#   rH   rG   re   )rr   r#   rH   rG   r   ztThe classes argument contains duplicate classes. Remove these duplicates before passing them to MultiLabelBinarizer.)r#   rH   rG   rH   r[   )r,   r-   r   r   r/   r0   rU   rA   hstackr   rQ   rf   rg   )r2   r   r   rj   r   r   r   'test_multilabel_binarizer_given_classes  s,    "r   c                  C   s|   g d} t g dg dg dg}t g dg dg dg}tg dd}t|| | g d|_t|| | d S )	Nr   r   r8   r   r   r   re   r^   )r,   r-   r   r   r/   rc   )r2   r   indicator_mat2r   r   r   r   (test_multilabel_binarizer_multiple_calls  s   
r   c                  C   s   dgdgdgg} t g dg dg dg}t }t|| | t|||  t }t|| | | t|||  d S )Nr#   r   rG   r9   r8   r:   )r,   r-   r   r   r/   r1   rU   rA   r2   r   r   r   r   r   .test_multilabel_binarizer_same_length_sequence  s   r   c                  C   s@  t g d} g dg dfg dg dfg d| fg}tg dg dg d	g}|D ]N\}}t }tj|td
}t||| t|j| tj||td
}t|| t }t|	|
|| t|j| tj||td
}t|| q+t }tt |i i ddifg W d    d S 1 sw   Y  d S )N)r   rG   rH   ))23)1)r   r   )r   r   r   ))r;   r?   r   )r   r;   rs   ))r   r   )r   )r   r   r   r8   r   rP   r   r;   )r   r,   r-   r   rt   r   r/   r0   r1   rU   rA   rQ   rf   	TypeError)tuple_classesr   r   r2   rc   r   indicator_mat_invr   r   r   ,test_multilabel_binarizer_non_integer_labels  s,   

"r   c                  C   s0   dg} t ddgg}t }t|| | d S )Nr#   r#   r#   r   r#   )r,   r-   r   r   r/   r   r   r   r   $test_multilabel_binarizer_non_unique  s   r   c                  C   s  dg} t  }||  tt |tddgg W d    n1 s&w   Y  |tddgg |tddgg |tddgg tt |tdgg W d    n1 sfw   Y  tt |tg dg W d    d S 1 sw   Y  d S )Nr   r#   rH   r   r#   r#   r#   )r   r/   rQ   rf   rg   r1   r,   r-   )r2   r   r   r   r   ,test_multilabel_binarizer_inverse_validation
  s   
"r   c                  C   s   t ddgg dd} tg dg dg}t| | t ddgg dd} tg dg dg}t| | t g d	g d
d} tg dg dg dg dg}t| | d S )Nr#   r   )r#   rG   rr   r   re   r+   r(   )r#   r   rr   rG   r*   )r   r#   rG   rH   )rH   rG   r   r#   r)   )r
   r,   r-   r   )outr4   r   r   r   $test_label_binarize_with_class_order  s   

"r   c              	   C   s>  dD ]}|dks|dkr-|r-t t t| ||||d W d    n1 s'w   Y  qt| ||||d}tt|| t||ksEJ t| }|dkrTt||d}nt	||||| d d}tt|t|  t
|||d}	|	| }tt|| t||ksJ |	|}
tt|
t|  t|
t| ksJ qd S )	Nr   r   rc   rE   rF   r   
multiclassre   g       @)rb   rc   rd   r]   )rQ   rf   rg   r
   r   r   r   r   r   r	   r   r/   r1   )ra   rc   rF   rE   r4   r   	binarizedy_typeinversedr3   inverse_outputr   r   r   check_binarized_results,  sR   


r   c                  C   s   g d} ddg}d}d}t ddgddgddggd d df d}t| |||| g d} ddg}d}d}t ddgddgddggd d df d}t| |||| d S )Nr9   r   r#   rG   r~   )r~   r#   rH   )r,   r-   reshaper   ra   rc   rF   rE   r4   r   r   r   test_label_binarize_binary_  s   ..r   c                  C   sx   g d} g d}d}d}dt d }t| |||| tt t| |d|dd W d    d S 1 s5w   Y  d S )Nr   r#   rG   rG   r   rH   r~   Tr   )r,   eyer   rQ   rf   rg   r
   r   r   r   r   test_label_binarize_multiclassr  s   
"r   arr_typec                 C   s   t g dg dg dg}g d}d}d}|| }| |}t||||| tt t||d|dd	 W d    d S 1 s@w   Y  d S )
Nr9   r   r>   r   rG   r   r~   Tr   )r,   r-   r   rQ   rf   rg   r
   )r   y_indrc   rF   rE   r4   ra   r   r   r   test_label_binarize_multilabel  s   

"r   c                   C   s   t t tddgddgddd W d    n1 sw   Y  t jtdd tddgddgd	 W d    n1 s=w   Y  t jtd
d tddggg dd	 W d    d S 1 s`w   Y  d S )Nr   rG   r#   )rc   rF   rE   zcontinuous target data is not r[   g333333?g@re   zmismatch with the labelsrH   r^   )rQ   rf   rg   r
   r   r   r   r   !test_invalid_input_label_binarize  s   "r   c                 C   s>   t | g dg dg dgtd}t|tg d d S )Nr9   )r~   r   r~   r>   rH   r   )r   r,   aranger   r-   )rn   r5   r   r   r    test_inverse_binarize_multiclass  s    r   c                  C   s8   t  } | dddtjg | tjg}t|dg dS )z]Check that label encoder encodes nans in transform.

    Non-regression test for #22628.
    r   r;   rG   N)r   rU   r,   nanrA   r   )ry   y_transr   r   r   test_nan_label_encoder  s   r   encoderc                 C   s:   t | drJ | jg dd}| g d}t|| dS )zxCheck that label encoders do not define set_output and work with y as a kwarg.

    Non-regression test for #26854.
    
set_outputrs   )ra   N)r   r/   r   )r   y_encoded_with_kwargy_encoded_positionalr   r   r   *test_label_encoders_do_not_have_set_output  s   r   zarray_namespace, device, dtypera   r|   )rH   r}   	   r}   r   rH   c                 C   sz  t ||}|j| |d}tdd t }t }||}||}||}	|| }|| }
t|d j|jks=J t|	d j|jksIJ t|j	d j|jksVJ t
t|||
 t
t|	||  t
t|j	||j	 t }t }||}|| }
t|d j|jksJ t|j	d j|jksJ t
t|||
 t
t|j	||j	 W d    d S 1 sw   Y  d S )N)deviceT)array_api_dispatchr   )r   asarrayr   r   rU   rA   r1   r   __name__r0   r   r   r/   )ra   array_namespacer   rJ   xpxp_yxp_labelnp_labelxp_transformedxp_inv_transformednp_transformedr   r   r   'test_label_encoder_array_api_compliance  s2   







"r   )Gnumpyr,   rQ   scipy.sparser   sklearnr   r   sklearn.preprocessing._labelr   r   r   r   r	   r
   sklearn.utils._array_apir   r   r   sklearn.utils._testingr   r   sklearn.utils.fixesr   r   r   r   r   sklearn.utils.multiclassr   sklearn.utils.validationr   	load_irisirisr   r7   rB   rI   markparametrizerY   rm   rp   r-   rt   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s     ):




	%

#3
		

