o
    ?HhY                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd d	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 G dd dZ2dd Z3ej4dddd Z5ej4dddd Z6ej4dd Z7ej4dd Z8ej9:ddee2gdd Z;dd  Z<d!d" Z=d#d$ Z>d%d& Z?ej9:d'd(gd(d)gfd*d+ Z@ej9:d,d-d.d/g d0fd1d2d3g d4fd5d6d7d8d9gfgd:d; ZAd<d= ZBej9:d>d?i d@dAgfdBi d@gfdCdDdEidFdGgfgdHdI ZCdJdK ZDdLdM ZEdNdO ZFdPdQ ZGej9:dRedSdTdUdVdWgfe%dXdYdZdVg fe!d[d\dZdVdWgfe"d]d]dZdVd^d_gfed`daddVg fedbdcdAdVg feedddedfdgdAdVg fgdhdi ZHej9:djeejIeJfeejIejIfeejIeJfe!ejIeJfe"ejIejIfe%ejIeJfgdkdl ZKdmdn ZLdodp ZMdqdr ZNdsdt ZOdudv ZPdwdx ZQdydz ZRd{d| ZSd}d~ ZTdd ZUdS )    N)partial)	resources)Path)dumpsloadsMock)	HTTPError)urlparse)clear_data_home
fetch_fileget_data_homeload_breast_cancerload_diabetesload_digits
load_files	load_irisload_linnerudload_sample_imageload_sample_images	load_wine)RemoteFileMetadata$_derive_folder_and_filename_from_url_fetch_remoteload_csv_dataload_gzip_compressed_csv_datacheck_as_frame)scale)Bunchc                   @   s    e Zd ZdZdd Zdd ZdS )
_DummyPathz8Minimal class that implements the os.PathLike interface.c                 C   s
   || _ d S Npath)selfr#    r%   `/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_base.py__init__0   s   
z_DummyPath.__init__c                 C   s   | j S r!   r"   )r$   r%   r%   r&   
__fspath__3   s   z_DummyPath.__fspath__N)__name__
__module____qualname____doc__r'   r(   r%   r%   r%   r&   r    -   s    r    c                 C   s   t j| rt|  d S d S r!   )osr#   isdirshutilrmtreer"   r%   r%   r&   _remove_dir7   s   r1   module)scopec                 c   "    t | d}|V  t| d S )Nscikit_learn_data_home_teststrmktempr1   tmpdir_factorytmp_filer%   r%   r&   	data_home<      r<   c                 c   r4   )Nscikit_learn_load_files_testr6   r9   r%   r%   r&   load_files_rootC   r=   r?   c                 c   sD    t j| d}t j|dd}|d |  t|V  t| d S )NdirF)rA   delete   Hello World!
)tempfilemkdtempNamedTemporaryFilewritecloser7   r1   )r?   test_category_dir1sample_filer%   r%   r&   test_category_dir_1J   s   

rK   c                 c   s$    t j| d}t|V  t| d S )Nr@   )rD   rE   r7   r1   )r?   test_category_dir2r%   r%   r&   test_category_dir_2T   s   
rM   path_containerc                 C   s~   | d ur| |}t |d}||ksJ tj|sJ | d ur#| |}t|d tj|r0J t |d}tj|s=J d S )N)r<   )r   r-   r#   existsr   )rN   r<   r%   r%   r&   test_data_home[   s   


rP   c                 C   s>   t | }t|jdksJ t|jdksJ |jd u sJ d S )Nr   )r   len	filenamestarget_namesDESCR)r?   resr%   r%   r&   test_default_empty_load_fileso   s   rV   c                 C   sN   t |}t|jdksJ t|jdksJ |jd u sJ |jdgks%J d S )N      rC   )r   rQ   rR   rS   rT   datarK   rM   r?   rU   r%   r%   r&   test_default_load_filesv   s
   r[   c                 C   sp   t j| t j }t|d|gdd}t|jdksJ t|j	dks'J |j
dks.J |jdgks6J d S )Ntestutf-8)description
categoriesencodingrW   zHello World!
)r-   r#   abspathsplitseppopr   rQ   rR   rS   rT   rY   )rK   rM   r?   categoryrU   r%   r%   r&   .test_load_files_w_categories_desc_and_encoding~   s   
rf   c                 C   sT   t |dd}t|jdksJ t|jdksJ |jd u sJ |dd u s(J d S )NF)load_contentrW   rX   rY   )r   rQ   rR   rS   rT   getrZ   r%   r%   r&   test_load_files_wo_load_content   s
   ri   allowed_extensionsz.txtz.jsonc                    sn   | d    d}fdd|D }|D ]}|d qt|  d}t fdd|D t|jks5J dS )	z;Check the behaviour of `allowed_extension` in `load_files`.sub)z	file1.txtz
file2.jsonz
file3.jsonzfile4.mdc                    s   g | ]} | qS r%   r%   .0f)dr%   r&   
<listcomp>   s    z6test_load_files_allowed_extensions.<locals>.<listcomp>s   hellorj   c                    s   g | ]}|j  v rt|qS r%   )suffixr7   )rm   prq   r%   r&   rp      s    N)mkdirwrite_bytesr   setrR   )tmp_pathrj   filespathsrs   rU   r%   )rj   ro   r&   "test_load_files_allowed_extensions   s   rz   zHfilename, expected_n_samples, expected_n_features, expected_target_nameszwine_data.csv      )class_0class_1class_2iris.csv      )setosa
versicolor	virginicazbreast_cancer.csv9     	malignantbenignc                 C   sV   t | \}}}|jd |ksJ |jd |ksJ |jd |ks"J tj|| d S )Nr   rW   )r   shapenptestingassert_array_equal)filenameexpected_n_samplesexpected_n_featuresexpected_target_namesactual_dataactual_targetactual_target_namesr%   r%   r&   test_load_csv_data   s
   r   c                  C   s   d} d}t | d}t | |d}t|dksJ t|dksJ tj|d |d  tj|d |d  tj|d	 |d	  |d
 dsIJ d S )Nr   ziris.rstdata_file_namer   descr_file_namer      r   rW   rX   z.. _iris_dataset:)r   rQ   r   r   r   
startswith)r   r   res_without_descrres_with_descrr%   r%   r&   test_load_csv_data_with_descr   s   
r   z filename, kwargs, expected_shapezdiabetes_data_raw.csv.gz  
   diabetes_target.csv.gzzdigits.csv.gz	delimiter,  A   c                 C   s&   t | fi |}|jt|ksJ d S r!   )r   r   tuple)r   kwargsexpected_shaper   r%   r%   r&   "test_load_gzip_compressed_csv_data   s   	r   c                  C   sB   d} d}t | d}t | |d\}}tj|| |dsJ d S )Nr   zdiabetes.rstr   r   z.. _diabetes_dataset:)r   r   r   r   r   )r   r   expected_datar   descrr%   r%   r&   -test_load_gzip_compressed_csv_data_with_descr   s   

r   c                  C   s   zTt  } t| jdksJ t| jdksJ | j}t|d ddd d f tjg dtjdks3J t|d ddd d f tjg dtjdksMJ | jsRJ W d S  t	yc   t
d Y d S w )NrX   r   )         )dtyperW   )rX      r|   3Could not load sample images, PIL is not available.)r   rQ   imagesrR   r   allarrayuint8rT   ImportErrorwarningswarn)rU   r   r%   r%   r&   test_load_sample_images   s   44r   c                  C   sJ   zt d} | jdksJ | jdksJ W d S  ty$   td Y d S w )Nz	china.jpgr   )i  i  r   r   )r   r   r   r   r   r   )chinar%   r%   r&   test_load_sample_image   s   r   c                  C   sn   t dd} | jjdksJ | jjsJ dt| jdksJ | js#J t  }tj	j
t| jd |jdd d	S )
zTest to check that we load a scaled version by default but that we can
    get an unscaled version when setting `scaled=False`.F)scaledr   r   r   r   gT5@g-C6?)atolN)r   rY   r   targetsizerQ   feature_namesrT   r   r   assert_allcloser   )diabetes_rawdiabetes_defaultr%   r%   r&   test_load_diabetes_raw   s   


r   zEloader_func, data_shape, target_shape, n_target, has_descr, filenames)r   r   )r   rX   Tr   )r{   r|   )r{   r   )r   r   )r   )   r   data_filenametarget_filenamer   )r   )r   @   )r   	   )n_class)Q  r   )r   c                    s   |   t  ts
J  jj|ksJ  jj|ksJ t dr*t j|d ks*J |d ur7t j|ks7J |r> j	s>J |rSd v sFJ t
 fdd|D sUJ d S d S )Nr   rW   data_modulec                    s.   g | ]}| v ot  d   |   qS )r   )r   rx   is_filerl   bunchr%   r&   rp   /  s
    ztest_loader.<locals>.<listcomp>)
isinstancer   rY   r   r   hasattrrQ   r   rS   rT   r   )loader_func
data_shapetarget_shapen_target	has_descrrR   r%   r   r&   test_loader  s&   


r   z%loader_func, data_dtype, target_dtypec                 C   s   |  }t || ||d d S )N)expected_data_dtypeexpected_target_dtyper   )r   
data_dtypetarget_dtypedefault_resultr%   r%   r&   test_toy_dataset_frame_dtype7  s   
r   c                  C   s2   t dd} tt| }d|_|d |jksJ d S )Nx)r   y)r   r   r   r   r   bunch_from_pklr%   r%   r&   test_loads_dumps_bunchL  s   
r   c                  C   sf   t dd} d| jd< tt| }|jdksJ |d dksJ d|_|jdks)J |d dks1J d S )Noriginal)keyzset from __dict__r   changed)r   __dict__r   r   r   r   r%   r%   r&   8test_bunch_pickle_generated_with_0_16_and_read_with_0_17S  s   

r   c                  C   s   t  } dt| v sJ d S )NrY   )r   rA   )rY   r%   r%   r&   test_bunch_dirh  s   r   c                  C   s   d} t jt| d ddlm} W d   n1 sw   Y  d} t jt| d ddlm} W d   dS 1 s:w   Y  dS )zLCheck that we raise the ethical warning when trying to import `load_boston`.z8The Boston housing prices dataset has an ethical problemmatchr   )load_bostonNzBcannot import name 'non_existing_function' from 'sklearn.datasets')non_existing_function)pytestraisesr   sklearn.datasetsr   r   )msgr   r   r%   r%   r&   test_load_boston_errorn  s   "r   c              	   C   s   d}t d|d}tt|dddt dd}| d| tjtd	d
G}tj	tdd
 t
|ddd W d   n1 s=w   Y  |jdksIJ |D ]}t|jd| ksYJ qKt|dksbJ W d   dS 1 smw   Y  dS )z'Check retry mechanism in _fetch_remote.z8https://scikit-learn.org/this_file_does_not_exist.tar.gzinvalid_fileN  	Not Found)urlcoder   hdrsfpside_effect"sklearn.datasets._base.urlretrievezRetry downloadingr   zHTTP Error 404r   r   )	n_retriesdelayr   zRetry downloading from url: )r   r   r	   ioBytesIOsetattrr   warnsUserWarningr   r   
call_countr7   messagerQ   )monkeypatchr   invalid_remote_fileurlretrieve_mockrecordrr%   r%   r&   1test_fetch_remote_raise_warnings_with_invalid_urlz  s"   "r  c                  C   s  t d\} }| dksJ |dksJ t d\} }| dksJ |dks$J t d\} }| dks0J |dks6J t d\} }| dksBJ |d	ksHJ t d
\} }| dksTJ |d	ksZJ t d\} }| dksfJ |dkslJ t d\} }| dksxJ |dks~J t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |dksJ t d\} }| dksJ |d	ksJ tjtdd t d W d    d S 1 sw   Y  d S )Nzhttps://example.com/file.tar.gzexample.comzfile.tar.gzu2   https://example.com/نمونه نماینده.datau   نمونه-نماینده.dataz)https://example.com/path/to-/.file.tar.gzzexample.com/path_tozhttps://example.com/downloaded_filezhttps://example.comz2https://example.com/path/@to/data.json?param=valuez	data.jsonz4https://example.com/path/@@to._/-_.data.json.#anchorz"https://example.com//some_file.txtzsome_file.txtzhttp://example/../some_file.txtexamplez'https://example.com/!.'.,/some_file.txtz+https://example.com/a/!.'.,/b/some_file.txtzexample.com/a_bzhttps://example.com/!.'.,zInvalid URLr   z
https:/../)r   r   r   
ValueError)folderr   r%   r%   r&   (test_derive_folder_and_filename_from_url  sr   
"r  c                    s    fdd}t |dS )Nc                    sH   t  }t| jd}||  st| ddd d t|| | d S )N/r   r   )r   r
   r#   striprO   r	   r/   copy)r   
local_pathserver_root	file_pathserver_sider%   r&   _urlretrieve_mock  s
   z,_mock_urlretrieve.<locals>._urlretrieve_mockr   r   )r  r  r%   r  r&   _mock_urlretrieve  s   
r  c              	   C   s  t |}|d }|  |d }d}|j|dd |d }|  |d }|jddd |d	 }|  t|}| d
| | dt|d td}	|	|d d ksUJ |	jdd|ks_J td}	|	|d d d ksoJ |	jdd|dks|J t	d}
t
t$ t
j|
d tddd W d    n1 sw   Y  W d    n1 sw   Y  |d d }t| |d gksJ d S )Nr  
data.jsonl{"a": 1, "b": 2}
r]   r`   	subfolderzother_file.txtzSome important text data.r<   r   z$sklearn.datasets._base.get_data_home)return_valuehttps://example.com/data.jsonlr  z,https://example.com/subfolder/other_file.txtzERetry downloading from url: https://example.com/subfolder/invalid.txtr   z)https://example.com/subfolder/invalid.txtr   )r   )r   rt   
write_textr  r  r   r   	read_textreescaper   r   r	   r  sortediterdir)r  tmpdirr  	data_fileserver_dataserver_subfolderother_data_filer<   r
  fetched_file_pathexpected_warning_msglocal_subfolderr%   r%   r&   test_fetch_file_using_data_home  sV   

r2  c                 C   s  | d}t|d }d}|j|dd | d}t|}| d| td|d	}||d ks1J |jdd|ks;J |jd
ksBJ td|d	}||d ksPJ |jdd|ksZJ |jd
ksaJ |  td|d	}||d kssJ |jdd|ks}J |jdksJ d S )Nr  r  r  r]   r   client_sider   r#  r  rW   rX   )	rt   r   r$  r  r  r   r%  r  unlink)r  r*  r  r+  r,  r3  r
  r/  r%   r%   r&   test_fetch_file_without_sha256  s:   

r6  c              	   C   s  | d}t|d }d}|j|dd t|  }| d}t|}| d| t	d||d	}||d ks;J |j
dd|ksEJ |jd
ksLJ t	d||d	}||d ks[J |j
dd|kseJ |jd
kslJ |jddd d| d}	tj|	d( t	d||d	}||d ksJ |j
dd|ksJ |jdksJ W d    n1 sw   Y  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ |  t	d||d	}||d ksJ |j
dd|ksJ |jdksJ t	d|d}||d ksJ |j
dd|ksJ |jdksJ d}
d}td| d|
 d}tjt|d/ tj|d t	d||
d	 W d    n1 sFw   Y  W d    d S W d    d S 1 s_w   Y  d S )Nr  r  r  r]   r   r3  r   r#  )r  sha256rW   zcorrupted contentszQSHA256 checksum of existing local file data.jsonl \(.*\) differs from expected \(z9\): re-downloading from https://example.com/data.jsonl \.r   rX   r   r4  deadbabecafebeefzdiffers from expectedz#The SHA256 checksum of data.jsonl (z) differs from expected (z).)rt   r   r$  hashlibr7  
read_bytes	hexdigestr  r  r   r%  r  r   r  r5  r&  r'  r   OSError)r  r*  r  r+  r,  expected_sha256r3  r
  r/  expected_msgnon_matching_sha256r0  expected_error_msgr%   r%   r&   test_fetch_file_with_sha256?  s   

	 $rA  )Vr9  r  r-   r&  r/   rD   r   	functoolsr   	importlibr   pathlibr   pickler   r   unittest.mockr   urllib.errorr	   urllib.parser
   numpyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sklearn.datasets._baser   r   r   r   r   "sklearn.datasets.tests.test_commonr   sklearn.preprocessingr   sklearn.utilsr   r    r1   fixturer<   r?   rK   rM   markparametrizerP   rV   r[   rf   ri   rz   r   r   r   r   r   r   r   r   float64intr   r   r   r   r   r  r  r  r2  r6  rA  r%   r%   r%   r&   <module>   s    8





	







	






G4(