o
    ?Hh~U                     @   sb  d dl Z d dlmZ d dlmZ d dlZd dlZd dlZd dlm	Z
 d dlmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZmZ e d!dZ"dd Z#ej$%dddde&ddfg ddfg ddfdde'dddfe'd ddfej(g dej)ddfej(g dej*ddfej(g dej+ddfddgdfde(ddgdfdg d dfd!e'd"d#dfe(g d dfej(g d e,ddfgd$d% Z-d&d' Z.d(d) Z/eej$%d*e d+d, Z0ej$%d-g d.ej$%d/g d0d1d2 Z1ej$%d-g d3ej$%d/g d0d4d5 Z2ej$%d-g d6ej$%d/g d0ej$%d7d8dgd9d:ggd;d< Z3ej$%d=ddgej$%d>ddgej$%d-g d6ej$%d/d?d@gej$%dAd g dBg dCgfd8ddDgdEdFgdGdggfgdHdI Z4ej$%d-g d3ej$%d/g dJdKdL Z5ej$%d-g d6ej$%d/g dJej$%dMd g dBg dCgfd8ddDgdEdFgdGdggfgdNdO Z6ej$%dPg dQdRdS Z7ej$%d-g d3dTdU Z8ej$%dPg dVej$%d7dd:gdWdX Z9ej$%d-g dYdZd[ Z:d\d] Z;ej$%d^ddDgd_d` Z<ej$%dag dbdcdd Z=dedf Z>dgdh Z?ej$%d7d d d8ge'd de(d d8ggdidj Z@ej$%d-g dkdldm ZAej$%dndodpe, dqfgdrds ZBej$%dtdugdvgdudvgdudwgdvdwggdxdy ZCdzd{ ZDd|d} ZEd~d ZFdd ZGdd ZHej$%dedd ZIdd ZJej$%dedd ZKdS )    N)copy)SkipTest)parse)_safe_indexingresampleshuffle))yield_namespace_device_dtype_combinations)_determine_key_type_get_column_indices_safe_assign)MockDataFrame)_array_api_for_tests_convert_containerassert_allclose_dense_sparseassert_array_equal'skip_if_array_api_compat_not_configured)CSC_CONTAINERSCSR_CONTAINERS	   )   r   c                  C   s\  t jddd} | jg dg dg dddd	}d
dlm} dgddgg ddgdgg}|D ]}t||dd}||| | q.g dddgfg ddgfg}|D ]\}}t||dd}||dd|f | qPd
dgddgfdgdgfg}	|	D ]\}
}t||
dd}||dd|f | qwd
dgddgddgg}|D ]}t||d
d}||| | qdS )z,Check _safe_indexing for polars as expected.polarsz0.18.2
minversion)      r      )r            )r   r   r   
   abcrow)orientr   )assert_frame_equalr"   r!   )r"   r!   r#   r#   r   axisTFT)FFTNr   r   )pytestimportorskip	DataFramepolars.testingr&   r   )pldfr&   str_keyskeyout	bool_keysbool_keystr_keyint_keysint_keyaxis_0_keys r9   a/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/utils/tests/test_indexing.pytest_polars_indexing   s,   r;   z
key, dtype)r   int)0str)TboolTr?   r   r   r   r<   r=   12r>   )r@   r<   )rA   r>   r   dtypeF))TFr?   )col_0r>   rF   col_1col_2)rG   r>   beginendc                 C   s   t | |ksJ d S N)r	   )r1   rE   r9   r9   r:   test_determine_key_type@   s   rM   c                   C   s<   t jtdd td W d    d S 1 sw   Y  d S )NNo valid specification of thematch      ?)r*   raises
ValueErrorr	   r9   r9   r9   r:   test_determine_key_type_error_   s   
"rT   c                   C   sH   t jtdd ttddddd W d    d S 1 sw   Y  d S )NzOnly array-like or scalar arerO   r   r   r   F)accept_slice)r*   rR   	TypeErrorr	   slicer9   r9   r9   r:   #test_determine_key_type_slice_errord   s   "rX   z#array_namespace, device, dtype_namec              	   C   s   t | |}tjddk |g d}t|dksJ |g d}t|dks*J z	|g d}W n ty>   d }Y nw |d urftjtdd	 t| W d    n1 sYw   Y  W d    d S W d    d S W d    d S 1 syw   Y  d S )
NT)array_api_dispatchr   r   r   r<   r)   r?   )y      ?      ?y       @       @y      @      @rN   rO   )	r   sklearnconfig_contextasarrayr	   rV   r*   rR   rS   )array_namespacedevice
dtype_namexpint_array_keybool_array_keycomplex_array_keyr9   r9   r:   !test_determine_key_type_array_apii   s(   

"re   
array_type)listarraysparse	dataframer   indices_type)rg   tuplerh   seriesrW   c                 C   s   ddg}|dkrt |d tr|d  d7  < tg dg dg dg| }t||}t||dd}t|tg dg dg|  d S )	Nr   r   rW   rZ   r   r   r      r   r   r   r'   
isinstancer<   r   r   r   rf   rk   indicesrh   subsetr9   r9   r:   &test_safe_indexing_2d_container_axis_0   s   
rv   )rg   rh   rm   polars_seriesc                 C   sl   ddg}|dkrt |d tr|d  d7  < tg d| }t||}t||dd}t|tddg|  d S )Nr   r   rW   	r   r   r   r   r   r   rp   r   r   r   r'   r   rq   rs   r9   r9   r:   test_safe_indexing_1d_container   s   
ry   )rh   ri   rj   r   rt   r   rH   rI   c                 C   s   t |}|dkrt|d tr|d  d7  < g d}tg dg dg dg| |}t||}t|d tr]| dvr]d	}tjt|d
 t||dd W d    d S 1 sVw   Y  d S t||dd}t	|tddgddgddgg|  d S )NrW   r   rG   rZ   rn   ro   r   rj   r   ESpecifying the columns using strings is only supported for dataframesrO   r'   r   r   r   r   r   r   )
r   rr   r<   r   r>   r*   rR   rS   r   r   )rf   rk   rt   indices_convertedcolumns_namerh   err_msgru   r9   r9   r:   &test_safe_indexing_2d_container_axis_1   s$   
"r   array_read_onlyindices_read_onlyrh   rm   zaxis, expected_arrayrn   ro   r   r   r   r   c           	      C   s   t g dg dg dg}| r|jdd t||}t ddg}|r*|jdd t||}t|||d}t|t|| d S )	NrZ   rn   ro   F)writer   r   r'   )nprh   setflagsr   r   r   )	r   r   rf   rk   r(   expected_arrayrh   rt   ru   r9   r9   r:   &test_safe_indexing_2d_read_only_axis_1   s   


r   )rg   rl   rh   rm   c                 C   sX   dgdgd  dgd  }t g d| }t ||}t||dd}t|t ddg|  d S )	NFTr   r   rx   r   r'   r   r   r   r   rs   r9   r9   r:   $test_safe_indexing_1d_container_mask   s
   
r   zaxis, expected_subsetc                 C   sZ   g d}t g dg dg dg| |}g d}t ||}t|||d}t|t ||  d S )NrG   rZ   rn   ro   )FTTr'   r   )rf   rk   r(   expected_subsetr}   rh   rt   ru   r9   r9   r:   test_safe_indexing_2d_mask   s   

r   z array_type, expected_output_type))rg   rg   rh   rh   ri   ri   rj   rm   r   rw   c                 C   sJ   t g dg dg dg| }d}t||dd}t g d|}t|| d S )NrZ   rn   ro   r   r   r'   r   )rf   expected_output_typerh   rt   ru   r   r9   r9   r:   #test_safe_indexing_2d_scalar_axis_0   s
   r   c                 C   s0   t g d| }d}t||dd}|dksJ d S )Nrx   r   r   r'   r   )r   r   )rf   rh   rt   ru   r9   r9   r:   test_safe_indexing_1d_scalar  s   r   )r   r   r   r   c           	      C   s   g d}t g dg dg dg| |}t|tr?| dvr?d}tjt|d t||dd	 W d    d S 1 s8w   Y  d S t||dd	}g d
}|dkrVdgdgdgg}t ||}t|| d S )NrG   rZ   rn   ro   rz   r{   rO   r   r'   )r   r   r   ri   r   r   r   )r   rr   r>   r*   rR   rS   r   r   )	rf   r   rt   r}   rh   r~   ru   expected_outputr   r9   r9   r:   #test_safe_indexing_2d_scalar_axis_1	  s    "
r   )rg   rh   ri   c                 C   s8   t g dg dg dg| }t|d dd}t|| d S )NrZ   rn   ro   r   r'   r   )rf   XX_subsetr9   r9   r:   test_safe_indexing_None_axis_0)  s   r   c                  C   s\   t d} d}| t}t jt|d t|dgdd W d    d S 1 s'w   Y  d S )Npandasz&No valid specification of the columns.rO   rQ   r   r'   )r*   r+   r,   X_toyrR   rS   r   )pdr~   r   r9   r9   r:   0test_safe_indexing_pandas_no_matching_cols_error0  s   

"r   r(   c                 C   sF   t jtdd ttddg| d W d    d S 1 sw   Y  d S )Nz'axis' should be either 0rO   r   r   r'   )r*   rR   rS   r   r   r'   r9   r9   r:   test_safe_indexing_error_axis8  s   "r   X_constructor)rh   rm   rw   c                 C   s   t td}| dkrt|} n| dkrtd}||} n| dkr.td}|j|d} d}tjt|d	 t	| d
dgdd W d    d S 1 sLw   Y  d S )Nr   rh   rm   r   rw   r   )valuesz='X' should be a 2D NumPy array, 2D sparse matrix or dataframerO   r   r   r'   )
rg   ranger   r]   r*   r+   SeriesrR   rS   r   )r   r   r   r.   r~   r9   r9   r:   !test_safe_indexing_1d_array_error>  s   

"r   c                  C   sd   ddg} g dg dg dg}d}t jt|d t|| dd	 W d    d S 1 s+w   Y  d S )
NrH   rI   rZ   rn   ro   z.String indexing is not supported with 'axis=0'rO   r   r'   r*   rR   rS   r   )rt   rh   r~   r9   r9   r:   4test_safe_indexing_container_axis_0_unsupported_typeQ  s   "r   c                  C   s   t d} t| j}t|j}|tdkrtd| g dg dd}t|ddgdd	}t| j	d
r9| j	j
}n| jjj
}t  td| d|jd< W d    n1 sXw   Y  |jd dksfJ d S )Nr   3z;SettingWithCopyWarning has been removed in pandas 3.0.0.devrZ   )r   r   r   )r!   r"   r   r   r'   SettingWithCopyWarningerrorr   )r   r   )r*   r+   parse_version__version__base_versionr   r,   r   hasattrerrorsr   corecommonwarningscatch_warningssimplefilteriloc)r   
pd_versionpd_base_versionr   ru   r   r9   r9   r:   4test_safe_indexing_pandas_no_settingwithcopy_warningY  s   





r   c                 C   s\   ddgddgddgg}d}t jt|d t|| dd	 W d
   d
S 1 s'w   Y  d
S )z@Check that we raise a ValueError when axis=1 with input as list.r   r   r   r   rp   r   z!axis=1 is not supported for listsrO   r'   Nr   )rt   r   r~   r9   r9   r:   *test_safe_indexing_list_axis_1_unsupportedr  s
   "r   )rh   ri   rj   c                 C   s   t jd}|dd}ddg}|t||jd }t|| }t|||d t||dd}t	|t||  ddg}||jd t|}t|| }t|||d t||dd}t	|t||  d	\}}|j|j }t|| }t|||d t	|t||  d
S )z,Check that `_safe_assign` works as expected.r   r   r   r   r   )row_indexerr'   )column_indexer)NNN)
r   randomRandomStaterandnlenshaper   r   r   r   )rf   rngX_arrayr   r   r   assigned_portionr   r9   r9   r:   test_safe_assign{  s.   




r   zkey, err_msg)r   z all features must be in \[0, 2\])whatever/A given column is not a column of the dataframez%No valid specification of the columnsc                 C   sZ   t d}|jtg dd}t jt|d t||  W d    d S 1 s&w   Y  d S )Nr   rG   columnsrO   )r*   r+   r,   r   rR   rS   r
   )r1   r~   r   X_dfr9   r9   r:   test_get_column_indices_error  s
   
	"r   r1   col1col2col3c                 C   s   t d}tjdtd}g d}|j||d}d| }t t}t	||  W d    n1 s1w   Y  t
|j|ks?J d S )Nr   )r   r   rD   )r   r   r   r   r   r   z1Selected columns, {}, are not unique in dataframe)r*   r+   r   zerosr<   r,   formatrR   rS   r
   r>   value)r1   r   toyr   r   r~   exc_infor9   r9   r:   6test_get_column_indices_pandas_nonunique_columns_error  s   

r   c               
   C   sl  t jddd} | jg dg dgg dd}G dd	 d	}||}td
dd
dgftdddd
gftd
dd
gfddgd
dgftdddd
gftddg dftdddgfddgddgfg g fg	}|D ]\}}t|||ksqJ qdd}t jt|d t|dg W d   n1 sw   Y  d}t jt|d t|tddd W d   dS 1 sw   Y  dS )z=Check _get_column_indices for edge cases with the interchanger   z1.5r   rZ   rn   r    r   c                   @   s   e Zd Zdd Zdd ZdS )z:test_get_column_indices_interchange.<locals>.MockDataFramec                 S   s
   || _ d S rL   )_df)selfr/   r9   r9   r:   __init__  s   
zCtest_get_column_indices_interchange.<locals>.MockDataFrame.__init__c                 S   s   t | j|S rL   )getattrr   )r   namer9   r9   r:   __getattr__  s   zFtest_get_column_indices_interchange.<locals>.MockDataFrame.__getattr__N)__name__
__module____qualname__r   r   r9   r9   r9   r:   r     s    r   r   Nr   r   r"   r#   r!   r@   r   rO   not_a_columnzkey.step must be 1 or None)r*   r+   r,   rW   r
   rR   rS   NotImplementedError)r   r/   r   	df_mockedkey_resultsr1   resultmsgr9   r9   r:   #test_get_column_indices_interchange  s0   "r   c                   C   s   t  d u sJ tt t dgddg W d    n1 sw   Y  tt t ddgddgddd W d    n1 s@w   Y  tt ddgdddksSJ d S )	Nr   r   Fr   )replace	n_samplesr   r   )r   )r   r*   rR   rS   r   r9   r9   r9   r:   test_resample  s    r   c                  C   s   t jd} d}d}| j|dfd}| jd||d}t||ddd d\}}t |dks.J t||dd|d\}}t |dkrBJ t |dksKJ d S )	Nr   d   g?r   sizer   r   random_statestratifyr   )r   r   r   normalbinomialr   allsum)r   r   pr   y_y_not_stratifiedy_stratifiedr9   r9   r:   test_resample_stratified  s   r   c                  C   s   t jd} d}| j|dfd}| jdd|d}t||dd| |d\}}t||d	d| |d\}}t |jd dk s<J t |jd dksHJ t||dd
| |d\}}|jd d
ks]J t |jd dksiJ d S )Nr   r   r   r   r   T2   )r   r   r   r   Fi  )r   r   r   r   randintr   uniquer   )r   r   r   r   	X_replacer   X_no_replacer9   r9   r:    test_resample_stratified_replace  s"   


r   c                  C   s\   t jd} d}| j|dfd}| jdd|dfd}t||d| |d\}}|jdks,J d S )Nr   r   r   r   r   r   r   )r   r   r   r   r   r   ndim)r   r   r   r   r9   r9   r:   test_resample_stratify_2dy  s   r   csr_containerc                 C   s   t jd}d}|j|dfd}|jdd|d}| |dd}tjtdd t	||d	||d
\}}W d    d S 1 s>w   Y  d S )Nr   r   r   r   r   zSparse data was passedrO   r   r   )
r   r   r   r   r   reshaper*   rR   rV   r   )r   r   r   r   r   r   r9   r9   r:   #test_resample_stratify_sparse_error%  s   "r   c                  C   sZ   dd } t ddgddggddgd	d
ggg}t| |}t| t| ||ks+J d S )Nc                 S   s   t dd | D S )Nc                 s   s"    | ]}t d d |D V  qdS )c                 s   s    | ]}t |V  qd S rL   rl   ).0Cr9   r9   r:   	<genexpr>3  s    zPtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>.<genexpr>Nr   )r   Br9   r9   r:   r  3  s     zFtest_shuffle_on_ndim_equals_three.<locals>.to_tuple.<locals>.<genexpr>r   )Ar9   r9   r:   to_tuple2  s   z3test_shuffle_on_ndim_equals_three.<locals>.to_tupler   r   r   r   r   r   rp   r   )r   rh   setr   )r  r  Sr9   r9   r:   !test_shuffle_on_ndim_equals_three1  s
   &r  csc_containerc                 C   s6  g d}t jg dtd}g d}tt jddgddgdd	ggtd}| t d
dd	}t|||||dd\}}}}	}
|g dksEJ t|tksMJ t	|g d |j
tks[J |g dkscJ t|tkskJ t	|	t jdd	gddgddggtd t|	tksJ t	|
 t ddgd	dgddgg d S )Nr    rD   rZ   r!   r   r"   r   r#   r   r   r   )r   )r#   r"   r!   )r   r   r   r   r   )r   rh   objectr   aranger   r   typerg   r   rE   toarray)r  r!   r"   r#   dea_sb_sc_sd_se_sr9   r9   r:   "test_shuffle_dont_convert_to_array;  s   $&*r  )Lr   r   unittestr   numpyr   r*   r[   $sklearn.externals._packaging.versionr   r   sklearn.utilsr   r   r   sklearn.utils._array_apir   sklearn.utils._indexingr	   r
   r   sklearn.utils._mockingr   sklearn.utils._testingr   r   r   r   r   sklearn.utils.fixesr   r   r
  r   r   r;   markparametrizebool_rW   rh   int32int64uint8r	  rM   rT   rX   re   rv   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r9   r9   r9   r:   <module>   s    !





.,


	


(

!

(


