o
    `^h$                  
   @   s0  d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZ ddlZddlZddlmZ dd	lmZmZ d
dlmZ d
dlmZmZmZ eddddZeddddZdZeeZ dej!fddZ"dd Z#dd Z$ee%edgdgeed
dddgeeddd dgd!d"d#dd"d$d%d!d&d'Z&dS )(a  
=============================
Species distribution dataset
=============================

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

 - `"Bradypus variegatus"
   <http://www.iucnredlist.org/details/3038/0>`_ ,
   the Brown-throated Sloth.

 - `"Microryzomys minutus"
   <http://www.iucnredlist.org/details/13408/0>`_ ,
   also known as the Forest Small Rice Rat, a rodent that lives in Peru,
   Colombia, Ecuador, Peru, and Venezuela.

References
----------

`"Maximum entropy modeling of species geographic distributions"
<http://rob.schapire.net/papers/ecolmod.pdf>`_ S. J. Phillips,
R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006.
    N)BytesIO)IntegralReal)PathLikemakedirsremove)exists   )Bunch)Intervalvalidate_params   )get_data_home)RemoteFileMetadata_fetch_remote_pkl_filepathzsamples.zipz.https://ndownloader.figshare.com/files/5976075@abb07ad284ac50d9e6d20f1c4211e0fd3c098f7f85955e89d321ee8efe37ac28)filenameurlchecksumzcoverages.zipz.https://ndownloader.figshare.com/files/5976078@4d862674d72e79d6cee77e63b98651ec7926043ba7d39dcb31329cf3f6073807zspecies_coverage.pkz   c                    sb    fddt |D }dd tfdd|D }tj |d}t|d }|dkr/d||< |S )	zjLoad a coverage file from an open file object.

    This will return a numpy array of the given dtype
    c                    s   g | ]}   qS  )readline).0_)Fr   e/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/sklearn/datasets/_species_distributions.py
<listcomp>H       z"_load_coverage.<locals>.<listcomp>c                 S   s   |   d t|   d fS )Nr   r   )splitfloat)tr   r   r   <lambda>I   s    z _load_coverage.<locals>.<lambda>c                    s   g | ]} |qS r   r   )r   line)
make_tupler   r   r   J   r   dtypes   NODATA_valuei)rangedictnploadtxtint)r   header_lengthr'   headerMnodatar   )r   r%   r   _load_coverageC   s   r1   c                 C   s6   |   d d}tj| dddd}||j_|S )zLoad csv file.

    Parameters
    ----------
    F : file object
        CSV file open in byte mode.

    Returns
    -------
    rec : np.ndarray
        record array representing the data
    ascii,r   z	S22,f4,f4)skiprows	delimiterr'   )r   decodestripr    r*   r+   r'   names)r   r8   recr   r   r   	_load_csvS   s   r:   c                 C   s`   | j | j }|| j| j  }| j| j }|| j| j  }t||| j}t||| j}||fS )a%  Construct the map grid from the batch object

    Parameters
    ----------
    batch : Batch object
        The object returned by :func:`fetch_species_distributions`

    Returns
    -------
    (xgrid, ygrid) : 1-D arrays
        The grid corresponding to the values in batch.coverages
    )x_left_lower_corner	grid_sizeNxy_left_lower_cornerNyr*   arange)batchxminxmaxyminymaxxgridygridr   r   r   construct_gridsg   s   rH   booleanleft)closedg        neither)	data_homedownload_if_missing	n_retriesdelayT)prefer_skip_nested_validation   g      ?c                 C   s  t | } t| st|  tdddddd}tj}t| t}t|s|s'tdt	
dtj| f  tt| ||d	}t|$}|jD ]}	t||	 }
d
|	v rRt|
}d|	v rZt|
}qBW d   n1 sew   Y  t| t	
dtj| f  tt| ||d	}t|,}g }|jD ]}	t||	 }
t	d|	 |t|
 qtj||d}W d   n1 sw   Y  t| td|||d|}tj||dd |S t|}|S )a  Loader for species distribution dataset from Phillips et. al. (2006).

    Read more in the :ref:`User Guide <species_distribution_dataset>`.

    Parameters
    ----------
    data_home : str or path-like, default=None
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : bool, default=True
        If False, raise an OSError if the data is not locally available
        instead of trying to download the data from the source site.

    n_retries : int, default=3
        Number of retries when HTTP errors are encountered.

        .. versionadded:: 1.5

    delay : float, default=1.0
        Number of seconds between retries.

        .. versionadded:: 1.5

    Returns
    -------
    data : :class:`~sklearn.utils.Bunch`
        Dictionary-like object, with the following attributes.

        coverages : array, shape = [14, 1592, 1212]
            These represent the 14 features measured
            at each point of the map grid.
            The latitude/longitude values for the grid are discussed below.
            Missing data is represented by the value -9999.
        train : record array, shape = (1624,)
            The training points for the data.  Each point has three fields:

            - train['species'] is the species name
            - train['dd long'] is the longitude, in degrees
            - train['dd lat'] is the latitude, in degrees
        test : record array, shape = (620,)
            The test points for the data.  Same format as the training data.
        Nx, Ny : integers
            The number of longitudes (x) and latitudes (y) in the grid
        x_left_lower_corner, y_left_lower_corner : floats
            The (x,y) position of the lower-left corner, in degrees
        grid_size : float
            The spacing between points of the grid, in degrees

    Notes
    -----

    This dataset represents the geographic distribution of species.
    The dataset is provided by Phillips et. al. (2006).

    The two species are:

    - `"Bradypus variegatus"
      <http://www.iucnredlist.org/details/3038/0>`_ ,
      the Brown-throated Sloth.

    - `"Microryzomys minutus"
      <http://www.iucnredlist.org/details/13408/0>`_ ,
      also known as the Forest Small Rice Rat, a rodent that lives in Peru,
      Colombia, Ecuador, Peru, and Venezuela.

    References
    ----------

    * `"Maximum entropy modeling of species geographic distributions"
      <http://rob.schapire.net/papers/ecolmod.pdf>`_
      S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
      190:231-259, 2006.

    Examples
    --------
    >>> from sklearn.datasets import fetch_species_distributions
    >>> species = fetch_species_distributions()
    >>> species.train[:5]
    array([(b'microryzomys_minutus', -64.7   , -17.85  ),
           (b'microryzomys_minutus', -67.8333, -16.3333),
           (b'microryzomys_minutus', -67.8833, -16.3   ),
           (b'microryzomys_minutus', -67.8   , -16.2667),
           (b'microryzomys_minutus', -67.9833, -15.9   )],
          dtype=[('species', 'S22'), ('dd long', '<f4'), ('dd lat', '<f4')])

    For a more extended example,
    see :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`
    g33333Wi  gfffffLi8  g?)r;   r=   r>   r?   r<   z1Data not found and `download_if_missing` is Falsez&Downloading species data from %s to %s)dirnamerO   rP   traintestNz'Downloading coverage data from %s to %sz - converting {}r&   )	coveragesrU   rT   	   )compressr   )r   r   r   r)   r*   int16r   DATA_ARCHIVE_NAMEOSErrorloggerinfoSAMPLESr   r   loadfilesr   r:   r   	COVERAGESdebugformatappendr1   asarrayr
   joblibdump)rM   rN   rO   rP   extra_paramsr'   archive_pathsamples_pathXffhandlerT   rU   coverages_pathrV   bunchr   r   r   fetch_species_distributions   sd   i



rp   )'__doc__loggingior   numbersr   r   osr   r   r   os.pathr   rf   numpyr*   utilsr
   utils._param_validationr   r    r   _baser   r   r   r^   ra   rZ   	getLogger__name__r\   rY   r1   r:   rH   strrp   r   r   r   r   <module>   sP    
