o
    `^hK                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlZddlZddlZddlZddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZ ddl m!Z!m"Z"m#Z# dZ$dZ%G dd dZ&eeddZdd Z'ej()ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfd0dd0id1d%dfgej()d2d3d4gej()d5dd6gd7d8 Z*ej()ddddidddfdddddddfdddidddfdddddddfd dd id!d"dfd d#ddd!d"dfd$dd$id%d&d'fd(dd(id)d*dfd(d+d,id)d*dfd-dd-id"d.dfd-d+d/id"d.dfgej()d2d3d4gd9d: Z+ej()dg d;d<d= Z,ej()d2d3d4gd>d? Z-ej()d2d3d4gd@dA Z.ej()d2d3d4gej()dBdCdCdDggdEdF Z/ej()dg dGej()d2d3d4gdHdI Z0ej()dg dJej()d2d3d4gdKdL Z1dMdN Z2ej3dOdPdQdR Z4ej3dOdPdSdT Z5ej()dUg dVej()d5dd6gdWdX Z6ej()dYd2dZid[fd\dZid]fgd^d_ Z7ej()d`ddadbdadadbd6d4dbd6dadbgdcdd Z8ej(9deej()dYd2d4idffd\didgfd4ddhdgfgdidj Z:ej(9deej()dkdldmgdndo Z;dpdq Z<ej()d5dd6gdrds Z=ej()d5dd6gdtdu Z>ej()d5dd6gej()d2d3d4gdvdw Z?ej()d5dd6gej()dxddyiddzdd{gd|d} Z@ej()d5dd6gej()d~dyd+dzieAdfddddgdeAdfd0d0d6deAdfdddddeAdfdddd6deAdfddddeBdfddddgdeBdfgej()d2d3d4gdd ZCej()ddddd{eAdfdddeAdfdddd{eAdfi eAdfgdd ZDej()d5dd6gdd ZEej()d5dd6gdd ZFej()d5dd6gdd ZGdd ZHej()d5dd6gdd ZIej()ddd6gdd ZJdd ZKdd ZLej()d5dd6gdd ZMej()dg ddd ZNdd ZOej()d5dd6gej()d2ddd ZPdd ZQdd ZRdd ZSdS )zTest the openml loader.    N)partial)	resources)BytesIO)	HTTPError)config_context)fetch_openml)_OPENML_PREFIX_get_local_path_open_openml_url_retry_with_clean_cache)Bunch)check_pandas_support)SkipTestassert_allcloseassert_array_equalz"sklearn.datasets.tests.data.openmlTc                   @   sF   e Zd Zdd ZdddZdd Zdd	 Zd
d Zdd Zdd Z	dS )_MockHTTPResponsec                 C   s   || _ || _d S N)datais_gzip)selfr   r    r   `/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_openml.py__init__'   s   
z_MockHTTPResponse.__init__c                 C   s   | j |S r   )r   read)r   amtr   r   r   r   +   s   z_MockHTTPResponse.readc                 C   s   | j   d S r   )r   closer   r   r   r   r   .   s   z_MockHTTPResponse.closec                 C   s   | j rddiS i S )NzContent-Encodinggzipr   r   r   r   r   info1   s   z_MockHTTPResponse.infoc                 C   s
   t | jS r   )iterr   r   r   r   r   __iter__6   s   
z_MockHTTPResponse.__iter__c                 C   s   | S r   r   r   r   r   r   	__enter__9      z_MockHTTPResponse.__enter__c                 C   s   dS )NFr   )r   exc_typeexc_valexc_tbr   r   r   __exit__<   r$   z_MockHTTPResponse.__exit__N)r   )
__name__
__module____qualname__r   r   r   r    r"   r#   r(   r   r   r   r   r   &   s    
r   )	data_homec                    s   d
ddddt j	td d|  fdd	  	fd
d
fddfddfdd 	fdd
fdd}tr]| tjjd| d S d S )Nz(https://api.openml.org/api/v1/json/data/z1https://api.openml.org/api/v1/json/data/features/zhttps://api.openml.org/data/v1/z-https://api.openml.org/api/v1/json/data/list/z.gz.id_c                    s~   t dd| tdd  |   }|dddddd	d
dddddddddddddddS )Nz\W-zhttps://api.openml.org/z-json-data-listz-jdlz-json-data-featuresz-jdfz-json-data-qualitiesz-jdqz
-json-dataz-jdz
-data_namez-dnz	-downloadz-dlz-limitz-lz-data_versionz-dvz-statusz-sz-deactivatedz-dactz-activez-act)resublenreplace)urlsuffixoutput)path_suffixr   r   
_file_nameU   s$   
z4_monkey_patch_webbased_functions.<locals>._file_namec           	         s   |  |sJ  | |}t| }|d.}|r/r/t| }t|dW  d    S |d}t| }t|dW  d    S 1 sIw   Y  d S )NrbTF)
startswithr   filesopenr   r   r   )	r4   has_gzip_headerexpected_prefixr5   data_file_namedata_file_pathffpdecompressed_f)r8   data_modulegzip_responseread_fnr   r   _mock_urlopen_sharedk   s   

$z>_monkey_patch_webbased_functions.<locals>._mock_urlopen_sharedc                        | |ddS N.jsonr4   r=   r>   r5   r   r4   r=   )rG   url_prefix_data_descriptionr   r   _mock_urlopen_data_descriptionz      zH_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_descriptionc                    rH   rI   r   rL   )rG   url_prefix_data_featuresr   r   _mock_urlopen_data_features   rO   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_featuresc                    rH   )Nz.arffrK   r   rL   )rG   url_prefix_download_datar   r   _mock_urlopen_download_data   rO   zE_monkey_patch_webbased_functions.<locals>._mock_urlopen_download_datac           	         s  |  sJ  | d}t| }|d}|d}| d}t|}W d    n1 s4w   Y  d|v rGtd ddd t	 d|d,}|rat	| }t
|dW  d    S |d}t	| }t
|d	W  d    S 1 s{w   Y  d S )
NrJ   r9   zutf-8error  Simulated mock errorr4   codemsghdrsrB   TF)r:   r   r;   r<   r   decodejsonloadsr   r   r   )	r4   r=   r?   r@   rA   rC   	decoded_s	json_datarB   )r8   rD   rF   url_prefix_data_listr   r   _mock_urlopen_data_list   s*   


$zA_monkey_patch_webbased_functions.<locals>._mock_urlopen_data_listc                    sr   |   }| ddk}|r||S |r||S |r)||S |r3 ||S td| )NzAccept-encodingr   zUnknown mocking URL pattern: %s)get_full_url
get_headerr:   
ValueError)requestargskwargsr4   r=   )rN   rQ   ra   rS   rM   rP   r`   rR   r   r   _mock_urlopen   s   







z7_monkey_patch_webbased_functions.<locals>._mock_urlopenurlopen)r   r<   OPENML_TEST_DATA_MODULEtest_offlinesetattrsklearndatasets_openml)contextdata_idrE   rh   r   )r8   rN   rQ   ra   rS   rG   rD   rE   r7   rF   rM   rP   r`   rR   r    _monkey_patch_webbased_functionsG   s"   rr   z9data_id, dataset_params, n_samples, n_features, n_targets=   rq            iris)nameversion      &   anneal1        cpu鍞     H      _  
      rx   zadult-census  M   MiceProtein  i  parser	liac-arffpandasrE   Fc           
      C   s  t d}t| ||d td	dd|d|}	t|	jd |ks"J t|	ts)J t|	j|j	s2J |	jj
||| fks>J t|	j|j	sGJ |	jj
||fksQJ |dkrht|	j|js^J |	jj
|fksgJ nt|	j|j	sqJ |	jj
||fks{J |	jdu sJ dS )
zCheck the behaviour of `fetch_openml` with `as_frame=True`.

    Fetch by ID and/or name (depending if the file was previously cached).
    r   rE   TFas_framecacher   idrv   Nr   )pytestimportorskiprr   r   intdetails
isinstancer   frame	DataFrameshaper   targetSeries
categories)
monkeypatchrq   dataset_params	n_samples
n_features	n_targetsr   rE   pdbunchr   r   r   test_fetch_openml_as_frame_true   s*   
(r   c                 C   s   t d t| |dd td	dd|d|}t|jd |ks"J t|ts)J |jdu s0J t|j	t
js9J |j	j||fksCJ t|jt
jsLJ |dkrZ|jj|fksYJ n
|jj||fksdJ t|jtslJ dS )
znCheck the behaviour of `fetch_openml` with `as_frame=False`.

    Fetch both by ID and/or name + version.
    r   Tr   Fr   r   Nrv   r   )r   r   rr   r   r   r   r   r   r   r   npndarrayr   r   r   dict)r   rq   r   r   r   r   r   r   r   r   r    test_fetch_openml_as_frame_false  s&   
$r   )rs   r   r   c           
         s   t dt| |dd t|dddd}t|dddd}|j|j}  fdd}||}j|  |j|j}j|j	   fd	d
}||}	j|	 dS )z:Check the consistency of the LIAC-ARFF and pandas parsers.r   Tr   Fr   rq   r   r   r   c                    s(    | j  }jj|r| |jS | S r   )rx   apitypesis_numeric_dtypeastypedtypeseriespandas_series)data_pandasr   r   r   convert_numerical_dtypes]  s   
zFtest_fetch_openml_consistency_parser.<locals>.convert_numerical_dtypesc                    sF    | j  }jj|r| |jS t|jjr!| j	|jj
S | S r   )rx   r   r   r   r   r   r   CategoricalDtypecatrename_categoriesr   r   )frame_pandasr   r   r   (convert_numerical_and_categorical_dtypesq  s   
zVtest_fetch_openml_consistency_parser.<locals>.convert_numerical_and_categorical_dtypesN)
r   r   rr   r   r   applytestingassert_frame_equalr   feature_names)
r   rq   
bunch_liacbunch_pandas	data_liacr   data_liac_with_fixed_dtypes
frame_liacr   frame_liac_with_fixed_dtypesr   )r   r   r   r   $test_fetch_openml_consistency_parserE  s2   


r   c                 C   s\   t d d}t| |dd t|dd|d}t|dd|d}t|j|j t|j|j dS )z^Check the equivalence of the dataset when using `as_frame=False` and
    `as_frame=True`.
    r   rs   Tr   Fr   N)r   r   rr   r   r   r   r   r   )r   r   rq   bunch_as_frame_truebunch_as_frame_falser   r   r   -test_fetch_openml_equivalence_array_dataframe  s"   
r   c                 C   sn  t d}|jjj}d}d}d}d}|g d}tjgd }	g d}
d	}t| |d
 t|d
d|d}|j	}|j
}|j}t||jsCJ t|j|	ksMJ |j|ksTJ t|j|
ks^J t|j|
kshJ |j|gkspJ t||jsxJ |j|ksJ |j|ksJ |j|ksJ |jjsJ t||jsJ |j|ksJ t|j|	|g ksJ |jjsJ dS )z>Check fetching on a numerical only dataset with string labels.r   rs   rt   ru   )rt   )rt      )zIris-setosazIris-versicolorzIris-virginicaru   )sepallength
sepalwidthpetallength
petalwidthclassTFr   N)r   r   r   r   r   r   float64rr   r   r   r   r   r   r   alldtypesr   columnsr   target_namesr   r   rx   index	is_unique)r   r   r   r   rq   
data_shapetarget_shapeframe_shapetarget_dtypedata_dtypes
data_namestarget_namer   r   r   r   r   r   r   test_fetch_openml_iris_pandas  sJ   

r   target_columnr   r   c                 C   s   t d}d}t| |d t|dd||d}t|dd|d}|j|j|j t|trB|j	|j
j|| |jjdks@J d	S |j
j|ksJJ |jjdksRJ d	S )
z@Check that we can force the target to not be the default target.r   rs   TF)rq   r   r   r   r   r   )rt      r   N)r   r   rr   r   r   r   r   r   listassert_index_equalr   r   Indexr   r   rx   )r   r   r   r   rq   bunch_forcing_targetbunch_defaultr   r   r   !test_fetch_openml_forcing_targets  s0   

r   )rs   rz   r~   r   r   c                 C   s   t d}t| |dd t|ddd|d}t|ddd|d\}}|j|j| t||jr8|j	|j
| dS |j|j
| dS )z>Check the behaviour of `return_X_y=True` when `as_frame=True`.r   Tr   Frq   r   r   
return_X_yr   N)r   r   rr   r   r   r   r   r   r   assert_series_equalr   )r   rq   r   r   r   Xyr   r   r   .test_fetch_openml_equivalence_frame_return_X_y  s(   

r   )rs   r~   r   r   c                 C   s\   t d t| |dd t|ddd|d}t|ddd|d\}}t|j| t|j| dS )z?Check the behaviour of `return_X_y=True` when `as_frame=False`.r   Tr   Fr   N)r   r   rr   r   r   r   r   )r   rq   r   r   r   r   r   r   r   .test_fetch_openml_equivalence_array_return_X_y  s$   

r   c                 C   sf   t d d}t| |dd d}t||ddd}t||ddd}|jjjdks)J |jjd	ks1J d
S )z9Check the difference between liac-arff and pandas parser.r   r   Tr   Fr   r   rA   ON)r   r   rr   r   r   r   kind)r   rq   r   bunch_liac_arffr   r   r   r   $test_fetch_openml_difference_parsers(  s$   
r   module)scopec                   C   s0   g dg dg dg dg dg dg ddS )	z+Returns the columns names for each dataset.)r   r   r   r   r   )'familyzproduct-typesteelcarbonhardnesstemper_rolling	conditionformabilitystrength
non-ageingsurface-finishzsurface-qualityenamelabilitybcbfbtbw%2Fmeblmchromphoscbondmarviexptlferrocorrblue%2Fbright%2Fvarn%2Fcleanlustrejurofmspr   thickwidthr2   oilborepackingr   )vendorMYCTMMINMMAXCACHCHMINCHMAXr   )N Mean_Acc1298_Mean_Mem40_CentroidMean_Acc1298_Mean_Mem40_RolloffMean_Acc1298_Mean_Mem40_FluxMean_Acc1298_Mean_Mem40_MFCC_0Mean_Acc1298_Mean_Mem40_MFCC_1Mean_Acc1298_Mean_Mem40_MFCC_2Mean_Acc1298_Mean_Mem40_MFCC_3Mean_Acc1298_Mean_Mem40_MFCC_4Mean_Acc1298_Mean_Mem40_MFCC_5Mean_Acc1298_Mean_Mem40_MFCC_6Mean_Acc1298_Mean_Mem40_MFCC_7Mean_Acc1298_Mean_Mem40_MFCC_8Mean_Acc1298_Mean_Mem40_MFCC_9Mean_Acc1298_Mean_Mem40_MFCC_10Mean_Acc1298_Mean_Mem40_MFCC_11Mean_Acc1298_Mean_Mem40_MFCC_12Mean_Acc1298_Std_Mem40_CentroidMean_Acc1298_Std_Mem40_RolloffMean_Acc1298_Std_Mem40_FluxMean_Acc1298_Std_Mem40_MFCC_0Mean_Acc1298_Std_Mem40_MFCC_1Mean_Acc1298_Std_Mem40_MFCC_2Mean_Acc1298_Std_Mem40_MFCC_3Mean_Acc1298_Std_Mem40_MFCC_4Mean_Acc1298_Std_Mem40_MFCC_5Mean_Acc1298_Std_Mem40_MFCC_6Mean_Acc1298_Std_Mem40_MFCC_7Mean_Acc1298_Std_Mem40_MFCC_8Mean_Acc1298_Std_Mem40_MFCC_9Mean_Acc1298_Std_Mem40_MFCC_10Mean_Acc1298_Std_Mem40_MFCC_11Mean_Acc1298_Std_Mem40_MFCC_12Std_Acc1298_Mean_Mem40_CentroidStd_Acc1298_Mean_Mem40_RolloffStd_Acc1298_Mean_Mem40_FluxStd_Acc1298_Mean_Mem40_MFCC_0Std_Acc1298_Mean_Mem40_MFCC_1Std_Acc1298_Mean_Mem40_MFCC_2Std_Acc1298_Mean_Mem40_MFCC_3Std_Acc1298_Mean_Mem40_MFCC_4Std_Acc1298_Mean_Mem40_MFCC_5Std_Acc1298_Mean_Mem40_MFCC_6Std_Acc1298_Mean_Mem40_MFCC_7Std_Acc1298_Mean_Mem40_MFCC_8Std_Acc1298_Mean_Mem40_MFCC_9Std_Acc1298_Mean_Mem40_MFCC_10Std_Acc1298_Mean_Mem40_MFCC_11Std_Acc1298_Mean_Mem40_MFCC_12Std_Acc1298_Std_Mem40_CentroidStd_Acc1298_Std_Mem40_RolloffStd_Acc1298_Std_Mem40_FluxStd_Acc1298_Std_Mem40_MFCC_0Std_Acc1298_Std_Mem40_MFCC_1Std_Acc1298_Std_Mem40_MFCC_2Std_Acc1298_Std_Mem40_MFCC_3Std_Acc1298_Std_Mem40_MFCC_4Std_Acc1298_Std_Mem40_MFCC_5Std_Acc1298_Std_Mem40_MFCC_6Std_Acc1298_Std_Mem40_MFCC_7Std_Acc1298_Std_Mem40_MFCC_8Std_Acc1298_Std_Mem40_MFCC_9Std_Acc1298_Std_Mem40_MFCC_10Std_Acc1298_Std_Mem40_MFCC_11Std_Acc1298_Std_Mem40_MFCC_12BH_LowPeakAmpBH_LowPeakBPMBH_HighPeakAmpBH_HighPeakBPMBH_HighLowRatioBHSUM1BHSUM2BHSUM3zamazed.suprisedzhappy.pleasedzrelaxing.calmzquiet.stillz
sad.lonelyzangry.aggresive)age	workclasszfnlwgt:z
education:zeducation-num:zmarital-status:zoccupation:zrelationship:zrace:zsex:zcapital-gain:zcapital-loss:zhours-per-week:znative-country:r   )NDYRK1A_NITSN1_NBDNF_NNR1_NNR2A_NpAKT_NpBRAF_N	pCAMKII_NpCREB_NpELK_NpERK_NpJNK_NPKCA_NpMEK_NpNR1_NpNR2A_NpNR2B_NpPKCAB_NpRSK_NAKT_NBRAF_NCAMKII_NCREB_NELK_NERK_NGSK3B_NJNK_NMEK_NTRKA_NRSK_NAPP_N
Bcatenin_NSOD1_NMTOR_NP38_NpMTOR_NDSCR1_NAMPKA_NNR2B_NpNUMB_NRAPTOR_NTIAM1_NpP70S6_NNUMB_NP70S6_NpGSK3B_NpPKCG_NCDK5_NS6_NADARB1_NAcetylH3K9_NRRP1_NBAX_NARC_NERBB4_NnNOS_NTau_NGFAP_NGluR3_NGluR4_NIL1B_NP3525_NpCASP9_NPSD95_NSNCA_NUbiquitin_NpGSK3B_Tyr216_NSHH_NBAD_NBCL2_NpS6_NpCFOS_NSYP_N	H3AcK18_NEGR1_NH3MeK4_NCaNA_Nr   )pclasssurvivedrx   sexrf  sibspparchticketfarecabinembarkedboatbody	home.destrs   rz   r~   r   r   r   r   r   r   r   r   r   datasets_column_namesG  s   )PP r  c                   C   s   i i ddddddddd	d
dddddddd
dddddddddddddd
ddddddddddd
dd
i i i ddiddddddd d!d"S )#Nr   r{   r   	   r   rz   r   ru   r   r   r   r   r   r  r  r     r  r  r  r  r  r	  )
r
  r  r  r  r  r  r  r  r  r  r  r   i  rv   i  i7  i  i4  )rf  r  r  r  r  r  r  r  r   r   r   r   r   datasets_missing_values:  sx   	
r  zJdata_id, parser, expected_n_categories, expected_n_floats, expected_n_ints))rs   r   rv   ru   r   )rs   r   rv   ru   r   )rz   r   !   r   r   )rz   r   r  rz   ru   )r~   r   rv   r   r   )r~   r   rv   r   r   )r   r   r   r   r   )r   r   r   E   r   )r   r   r  r   r   )r   r   r  r   r   )r   r   rv   r   r   )r   r   rv   r   r   )r   r   r   r   r   )r   r   r   r   r   c	                    s   t d}	|	jjj t| ||d t|dd|d}
|
j}t fdd|j	D }tdd |j	D }td	d |j	D }||ksBJ ||ksHJ ||ksNJ |j
 || ksYJ |   }| D ]\}}|| |d
}||kswJ qedS )zYCheck that `fetch_openml` infer the right number of categories, integers, and
    floats.r   r   TFr   c                    s   g | ]	}t | r|qS r   )r   .0r   r   r   r   
<listcomp>      z5test_fetch_openml_types_inference.<locals>.<listcomp>c                 S      g | ]	}|j d kr|qS )rA   r   r  r   r   r   r    r  c                 S   r  )ir  r  r   r   r   r    r  r   N)r   r   r   r   r   rr   r   r   r2   r   r   tolistisnasumto_dictitemsget)r   rq   r   expected_n_categoriesexpected_n_floatsexpected_n_intsrE   r  r  r   r   r   n_categoriesn_floatsn_intsframe_feature_to_n_nanrx   	n_missingexpected_missingr   r  r   !test_fetch_openml_types_inferencek  s0   
(
r  zparams, err_msgunknownz:The 'parser' parameter of fetch_openml must be a str amongr   z<The 'as_frame' parameter of fetch_openml must be an instancec                 C   sV   d}t | |d tjt|d tdd|i| W d    d S 1 s$w   Y  d S )Nr   Tmatchrq   r   )rr   r   raisesrd   r   r   paramserr_msgrq   r   r   r   &test_fetch_openml_validation_parameter  s
   "r  r  auto)r   r   c                 C   s   d}z	t d W td ty?   t| |d d}tjt|d td	d|i| W d   Y dS 1 s7w   Y  Y dS w )
z=Check that we raise the proper errors when we require pandas.r   !test_fetch_openml_requires_pandasTz:requires pandas to be installed. Alternatively, explicitlyr  rq   Nz.This test requires pandas to not be installed.r   )r   ImportErrorrr   r   r  r   r   )r   r  rq   r  r   r   r   'test_fetch_openml_requires_pandas_error  s   
&r  z2ignore:Version 1 of dataset Australian is inactivez:Sparse ARFF datasets cannot be loaded with parser='pandas'z9Sparse ARFF datasets cannot be loaded with as_frame=True.)r   r   c                 C   sb   t d d}t| |d t jt|d td|dd| W d   dS 1 s*w   Y  dS )	ztCheck that we raise the expected error for sparse ARFF datasets and
    a wrong set of incompatible parameters.
    r   $  Tr  F)rq   r   Nr   )r   r   rr   r  rd   r   r  r   r   r   #test_fetch_openml_sparse_arff_error  s   
"r  zdata_id, data_type)rs   	dataframe)r  sparsec                 C   sN   t d}t| |d t|ddd}|dkr|jntjj}t|j	|s%J dS )z&Check the auto mode of `fetch_openml`.r   Tr  F)rq   r   r   r  N)
r   r   rr   r   r   scipyr  
csr_matrixr   r   )r   rq   	data_typer   r   klassr   r   r   test_fetch_openml_auto_mode  s
   

r  c              	   C   s   t d d}t| |d d}t jt|d. tdd t|ddd	d
 W d   n1 s/w   Y  W d   dS W d   dS 1 sGw   Y  dS )z[Check that we raise a warning regarding the working memory when using
    LIAC-ARFF parser.r   r   Tz*Could not adhere to working_memory config.r  gư>)working_memoryFr   r   N)r   r   rr   warnsUserWarningr   r   )r   rq   rY   r   r   r   :test_convert_arff_data_dataframe_warning_low_memory_pandas  s    
"r  c                 C   sb   d}d}t | || td}tjt|d t|dddd W d   dS 1 s*w   Y  dS )	z\Check that a warning is raised when multiple versions exist and no version is
    requested.rs   rw   a;  Multiple active versions of the dataset matching the name iris exist. Versions may be fundamentally different, returning version 1. Available versions:
- version 1, status: active
  url: https://www.openml.org/search?type=data&id=61
- version 3, status: active
  url: https://www.openml.org/search?type=data&id=969
r  Fr   )rx   r   r   r   N)rr   r0   escaper   r  r  r   )r   rE   rq   	data_namerY   r   r   r   ,test_fetch_openml_iris_warn_multiple_version(  s   	"r  c                 C   sT   d}d}d}d}t | || t||dddd}|jj||fks!J |jdu s(J dS )z/Check that we can get a dataset without target.rs   Nrt   r   Fr   rq   r   r   r   r   )rr   r   r   r   r   )r   rE   rq   r   expected_observationsexpected_featuresr   r   r   r   test_fetch_openml_no_targetC  s   r  c                 C   sb   t d d}t| ||d t|dd|d}|jjd }|jd   s'J t|j	g d d	S )
zRcheck that missing values in categories are compatible with pandas
    categoricalr   iY  r   FTrq   r   r   r   r  )FEMALEMALE_N)
r   r   rr   r   r   r   r  anyr   r   )r   rE   r   rq   penguins	cat_dtyper   r   r   test_missing_values_pandasW  s   
r  r     glass2)rq   rx   ry   c                 C   s~   d}t | || d}tjt|d tddddd|}W d   n1 s'w   Y  |jjdks4J |jd	 d
ks=J dS )z;Check that we raise a warning when the dataset is inactive.r  z(Version 1 of dataset glass2 is inactive,r  Fr   )r   r   r   N)   r  r   40675r   )rr   r   r  r  r   r   r   r   )r   rE   r   rq   rY   r	  r   r   r   test_fetch_openml_inactivem  s   
r  z"data_id, params, err_type, err_msgzNo active dataset glass2 foundr   r   )rq   r   z1Can only handle homogeneous multi-target datasets)rq   r   zOSTRING attributes are not supported for array representation. Try as_frame=Truer   )rq   r   r   zTarget column 'family'	undefinedz(Could not find target_column='undefined'c                 C   sr   t | || |dds|dkrtd tj||d tdd|d| W d    d S 1 s2w   Y  d S )Nr   Tr   r  F)r   r   r   )rr   r  r   r   r  r   )r   rE   rq   r  err_typer  r   r   r   r   test_fetch_openml_error  s   2
"r  zparams, err_type, err_msgr   ry   zCThe 'version' parameter of fetch_openml must be an int in the rangenAmE)rq   rx   zCThe 'data_id' parameter of fetch_openml must be an int in the rangez6The 'version' parameter of fetch_openml must be an intzFNeither name nor data_id are provided. Please provide name or data_id.c                 C   sB   t j||d tdi |  W d    d S 1 sw   Y  d S )Nr  r   )r   r  r   )r  r  r  r   r   r   )test_fetch_openml_raises_illegal_argument  s   "r  c                 C   s^  d}d}d}t | || d}||}tjt|d t||dddd W d    n1 s.w   Y  d	}||}tjt|d t||dddd W d    n1 sUw   Y  d}||}tjt|d t||d
gdddd W d    n1 s~w   Y  d	}||}tjt|d t||d
gdddd W d    d S 1 sw   Y  d S )Nr   z.target_column='{}' has flag is_row_identifier.z&target_column='{}' has flag is_ignore.MouseIDr  Fr   r  Genotyper   )rr   formatr   r  r  r   )r   rE   rq   expected_row_id_msgexpected_ignore_msg
target_colrY   r   r   r   test_warn_ignore_attribute  s`   

	

"r  c                 C   X   d}t | || d}tjt|d t|dddd W d    d S 1 s%w   Y  d S )Nrv   zJOpenML registered a problem with the dataset. It might be unusable. Error:r  Fr   r   rr   r   r  r  r   r   rE   rq   rY   r   r   r   test_dataset_with_openml_error     "r  c                 C   r  )Nr   zFOpenML raised a warning on the dataset. It might be unusable. Warning:r  Fr   r   r  r  r   r   r    test_dataset_with_openml_warning  r  r  c                 C   s   t d d}t| |dd |dddd}tdi |}tdi |dddii}td	d
 |jd jjD s9J tdd
 |jd jjD rIJ dS )zACheck that we can overwrite the default parameters of `read_csv`.r   6  Frq   rE   Tr   read_csv_kwargsskipinitialspacec                 s       | ]}| d V  qdS  Nr:   r  r   r   r   r   	<genexpr>0  s    

zFtest_fetch_openml_overwrite_default_params_read_csv.<locals>.<genexpr>r   c                 s   r#  r$  r&  r'  r   r   r   r(  3  s
    
Nr   )	r   r   rr   r   r   r   r   r   r  )r   rq   common_paramsadult_without_spacesadult_with_spacesr   r   r   3test_fetch_openml_overwrite_default_params_read_csv  s(   
	r,  c           	      C   st   d}t | || tjjj|}t|d}t||}t	||}t
j|s)J t||}| | ks8J d S )Nrs   scikit_learn_data)rr   rm   rn   ro   
_DATA_FILEr  strmkdirr
   r	   ospathisfiler   )	r   rE   tmpdirrq   openml_pathcache_directory	response1location	response2r   r   r   test_open_openml_url_cache=  s   


r:  write_to_diskc                    s   d}t jjj|}t|d}t||  fdd}| t jjd| t	j
tdd t|| W d    n1 s=w   Y  tj rJJ d S )Nrs   r-  c                    sF   rt  d}|d W d    td1 sw   Y  td)Nw Invalid request)r<   writerd   )re   rf   rg   rA   r8  r;  r   r   rh   U  s   
z>test_open_openml_url_unlinks_local_path.<locals>._mock_urlopenri   r>  r  )rm   rn   ro   r.  r  r/  r0  r	   rl   r   r  rd   r
   r1  r2  exists)r   r4  r;  rq   r5  r6  rh   r   r@  r   'test_open_openml_url_unlinks_local_pathN  s   
rB  c                    s   d}t jjj|}t| d}t|| t	tj
  t d}|d W d    n1 s4w   Y  t|| fdd}d}tjt|d | }W d    n1 s[w   Y  |d	ksfJ d S )
Nrs   r-  r<  r=  c                      s   t j r
tddS )NzFile exist!rv   )r1  r2  rA  	Exceptionr   r8  r   r   
_load_datam  s   z/test_retry_with_clean_cache.<locals>._load_dataz!Invalid cache, redownloading filer  rv   )rm   rn   ro   r.  r  r/  r0  r	   r1  makedirsr2  dirnamer<   r?  r   r   r  RuntimeWarning)r4  rq   r5  r6  rA   rE  warn_msgresultr   rD  r   test_retry_with_clean_cachec  s   
rK  c                 C   sr   d}t jjj|}t| d}t||dd }d}tj	t
|d |  W d    d S 1 s2w   Y  d S )Nrs   r-  c                   S      t d ddd t d)NrU   rV   rW   r   r   r   r   r   r   rE    s   z:test_retry_with_clean_cache_http_error.<locals>._load_datarV   r  )rm   rn   ro   r.  r  r/  r0  r   r   r  r   )r4  rq   r5  r6  rE  	error_msgr   r   r   &test_retry_with_clean_cache_http_errorz  s   
"rO  c           
      C   s   dd }d}t |d}t| || t|d|dddd\}}| tjjd	| t|d|dddd\}}	tj	
|| tj	
||	 d S )
Nc                 _   s   t d|   )NzhThis mechanism intends to test correct cachehandling. As such, urlopen should never be accessed. URL: %s)rd   rb   re   rf   rg   r   r   r   _mock_urlopen_raise  s
   z4test_fetch_openml_cache.<locals>._mock_urlopen_raisers   r-  TFr   )rq   r   r,   r   r   r   ri   )r/  r0  rr   r   rl   rm   rn   ro   r   r   r   )
r   rE   r4  rQ  rq   r6  	X_fetched	y_fetchedX_cachedy_cachedr   r   r   test_fetch_openml_cache  s.   
	
rV  zas_frame, parser))Tr   )Fr   )Tr   )Fr   c                    sT  |s|dkrt d d}t| |d td d|  }d}t|| }|d  |d}	t|	d}
t|
	 }d	|t
|d
 < W d   n1 sMw   Y  t d}|| W d   n1 shw   Y  tjjj fdd}| tjjd| t t}tjj|d||d W d   n1 sw   Y  |dsJ dS )z/Check that the checksum is working as expected.r   rz   Tr-   r.   zdata-v1-dl-1666876.arff.gzztest_invalid_checksum.arffr9   %   rv   Nwbc                    s\   |   }|dr*t d}| }W d    n1 sw   Y  tt|ddS | S )Nzdata/v1/download/1666876r9   Tr   )rb   endswithr<   r   r   r   )re   rf   rg   r4   rA   corrupted_datacorrupt_copy_pathmocked_openml_urlr   r   swap_file_mock  s   

z9test_fetch_openml_verify_checksum.<locals>.swap_file_mockri   Fr   1666876)r   r   rr   rj   r   r;   r<   r   	bytearrayr   r2   GzipFiler?  rm   rn   ro   ri   rl   r  rd   r   r  )r   r   r   r4  r   rq   original_data_moduleoriginal_data_file_nameoriginal_data_path	orig_file	orig_gzipr   modified_gzipr^  excr   r[  r   !test_fetch_openml_verify_checksum  s2   

	ri  c              	   C   s   dd }|  tjjd| d}tjttdt	|  dd/}tj
tdd t|d d	d
 W d    n1 s9w   Y  t|dksFJ W d    d S 1 sQw   Y  d S )Nc                 _   rL  )Ni  Simulated network errorrW   rM  rP  r   r   r   _mock_urlopen_network_error  s   zPtest_open_openml_url_retry_on_network_error.<locals>._mock_urlopen_network_errorri   zinvalid-urlz+A network error occurred while downloading z. Retrying...r  rj  r   )delayr   )rl   rm   rn   ro   r   r  r  r0   r  r   r  r   r
   r2   )r   rk  invalid_openml_urlrecordr   r   r   +test_open_openml_url_retry_on_network_error  s&   
"ro  )r   r   c                 C   sh   |dkr	t d d}t| || tjj|dd|d}|dus!J |d jdks*J d|d	 vs2J dS )
zCheck that we can load the "zoo" dataset.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/14340
    r   >   Fr   Nr   )e      animalr   )r   r   rr   rm   rn   r   r   )r   rE   r   rq   datasetr   r   r   &test_fetch_openml_with_ignored_feature   s   
ru  c                 C   s  t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|j|j |jjd		 r:J |jj
d		 rEJ tddd
d|}tddd
d|}|j|jd |jd  |jd jd		 rsJ |jd j
d		 rJ dS )zCheck that we strip the single quotes when used as a string delimiter.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/23381
    r   r   Fr   Tr   r   rq   r   r   'r  )r   r   r   Nr   )r   r   rr   r   r   r   r   r/  r:   r  rY  r   )r   r   rq   r)  mice_pandasmice_liac_arffr   r   r   test_fetch_openml_strip_quotes  s(   
rz  c                 C   sj   t d}d}t| |dd dd|d}tdddi|}tdddi|}|j|jd	 |jd	  d
S )zCheck that we can strip leading whitespace in pandas parser.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25311
    r   r  Fr   Trv  r   r   r   Nr   )r   r   rr   r   r   r   r   r   r   rq   r)  adult_pandasadult_liac_arffr   r   r   $test_fetch_openml_leading_whitespace3  s   
r~  c                 C   sb   t d}d}t| |dd dd|d}td
ddi|}td
ddi|}|j|j|j d	S )zCheck that we can handle escapechar and single/double quotechar.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/25478
    r   iZ  Fr   Trv  r   r   Nr   )r   r   rr   r   r   r   r   r{  r   r   r   &test_fetch_openml_quotechar_escapecharE  s   
r  )T__doc__r   r\   r1  r0   	functoolsr   	importlibr   ior   urllib.errorr   numpyr   r   scipy.sparser  rm   r   sklearn.datasetsr   fetch_openml_origsklearn.datasets._openmlr   r	   r
   r   sklearn.utilsr   $sklearn.utils._optional_dependenciesr   sklearn.utils._testingr   r   r   rj   rk   r   rr   markparametrizer   r   r   r   r   r   r   r   r   fixturer  r  r  r  r  filterwarningsr  r  r  r  r  r  r  rd   KeyErrorr  r  r  r  r  r,  r:  rB  rK  rO  rV  ri  ro  ru  rz  r~  r  r   r   r   r   <module>   s   {+'
?

/

 
s
00

	








-




1

 


"
	.