o
    `^h@X                     @   s  d Z ddlZddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ dd Zdd Zdd Zdd Zd1ddZddddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd2d%d&Z d3d'd(Z!d)d* Z"d+d, Z#d-d. Z$d/d0 Z%dS )4zBA collection of utilities to work with sparse matrices and arrays.    N)LinearOperator   )_sparse_min_max_sparse_nan_min_max)_check_sample_weight   )csc_mean_variance_axis0)csr_mean_variance_axis0)incr_mean_variance_axis0c                 C   s(   t | r| jnt| }d| }t|)z2Raises a TypeError if X is not a CSR or CSC matrixz,Expected a CSR or CSC sparse matrix, got %s.)spissparseformattype	TypeError)X
input_typeerr r   W/home/air/shanriGPT/back/venv/lib/python3.10/site-packages/sklearn/utils/sparsefuncs.py_raise_typeerror   s   r   c                 C   s   | dvr
t d|  d S )N)r   r   z8Unknown axis value: %d. Use 0 for rows, or 1 for columns)
ValueErroraxisr   r   r   _raise_error_wrong_axis   s
   r   c                 C   s6   |j d | j d ksJ |  j|j| jdd9  _dS )a  Inplace column scaling of a CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.
        It should be of CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_csr_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r   r   clip)modeN)shapedatatakeindicesr   scaler   r   r   inplace_csr_column_scale%   s   %r"   c                 C   s:   |j d | j d ksJ |  jt|t| j9  _dS )a  Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR format.

    scale : ndarray of float of shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    r   N)r   r   nprepeatdiffindptrr    r   r   r   inplace_csr_row_scaleN   s   "r'   Fc                 C   s   t | t| r!| jdkr!|dkrt| ||dS t| j||dS t| r>| jdkr>|dkr6t| ||dS t| j||dS t|  dS )a{  Compute mean and variance along an axis on a CSR or CSC matrix.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It can be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    return_sum_weights : bool, default=False
        If True, returns the sum of weights seen for each feature
        if `axis=0` or each sample if `axis=1`.

        .. versionadded:: 0.24

    Returns
    -------

    means : ndarray of shape (n_features,), dtype=floating
        Feature-wise means.

    variances : ndarray of shape (n_features,), dtype=floating
        Feature-wise variances.

    sum_weights : ndarray of shape (n_features,), dtype=floating
        Returned if `return_sum_weights` is `True`.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.mean_variance_axis(csr, axis=0)
    (array([2.  , 0.25, 1.75]), array([12.    ,  0.1875,  4.1875]))
    csrr   )weightsreturn_sum_weightscscN)r   r   r   r   _csr_mean_var_axis0_csc_mean_var_axis0Tr   )r   r   r)   r*   r   r   r   mean_variance_axis`   s$   6r/   )r)   c                C   s.  t | t| r| jdv st|  t|dkr#tj|j||j	d}t|t|  kr9t|ks>t
d t
d|dkr^t|| jd kr]t
d| jd  dt| dnt|| jd kryt
d	| jd  dt| d|dkr| jn| } |d
urt|| | j	d}t| ||||dS )a  Compute incremental mean and variance along an axis on a CSR or CSC matrix.

    last_mean, last_var are the statistics computed at the last step by this
    function. Both must be initialized to 0-arrays of the proper size, i.e.
    the number of features in X. last_n is the number of samples encountered
    until now.

    Parameters
    ----------
    X : CSR or CSC sparse matrix of shape (n_samples, n_features)
        Input data.

    axis : {0, 1}
        Axis along which the axis should be computed.

    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of means to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of variances to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating
        Sum of the weights seen so far, excluding the current weights
        If not float, it should be of shape (n_features,) if
        axis=0 or (n_samples,) if axis=1. If float it corresponds to
        having same weights for all samples (or features).

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    Returns
    -------
    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise means if axis = 0 or
        sample-wise means if axis = 1.

    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise variances if axis = 0 or
        sample-wise variances if axis = 1.

    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral
        Updated number of seen samples per feature if axis=0
        or number of seen features per sample if axis=1.

        If weights is not None, n is a sum of the weights of the seen
        samples or features instead of the actual number of seen
        samples or features.

    Notes
    -----
    NaNs are ignored in the algorithm.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.incr_mean_variance_axis(
    ...     csr, axis=0, last_mean=np.zeros(3), last_var=np.zeros(3), last_n=2
    ... )
    (array([1.3..., 0.1..., 1.1...]), array([8.8..., 0.1..., 3.4...]),
    array([6., 6., 6.]))
    )r+   r(   r   )dtypez8last_mean, last_var, last_n do not have the same shapes.r   zHIf axis=1, then last_mean, last_n, last_var should be of size n_samples z (Got z).zIIf axis=0, then last_mean, last_n, last_var should be of size n_features N)	last_meanlast_varlast_nr)   )r   r   r   r   r   r#   sizefullr   r0   r   r.   r   _incr_mean_var_axis0)r   r   r1   r2   r3   r)   r   r   r   incr_mean_variance_axis   sB   Q$
r7   c                 C   R   t | r| jdkrt| j| dS t | r#| jdkr#t| | dS t|  dS )a  Inplace column scaling of a CSC/CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features. It should be
        of CSC or CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 3, 4, 4, 4])
    >>> indices = np.array([0, 1, 2, 2])
    >>> data = np.array([8, 1, 2, 5])
    >>> scale = np.array([2, 3, 2])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_column_scale(csr, scale)
    >>> csr.todense()
    matrix([[16,  3,  4],
            [ 0,  0, 10],
            [ 0,  0,  0],
            [ 0,  0,  0]])
    r+   r(   N)r   r   r   r'   r.   r"   r   r    r   r   r   inplace_column_scale!  s
   %r9   c                 C   r8   )a  Inplace row scaling of a CSR or CSC matrix.

    Scale each row of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR or CSC format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed sample-wise values to use for scaling.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 4, 5])
    >>> indices = np.array([0, 1, 2, 3, 3])
    >>> data = np.array([8, 1, 2, 5, 6])
    >>> scale = np.array([2, 3, 4, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 1, 0, 0],
            [0, 0, 2, 0],
            [0, 0, 0, 5],
            [0, 0, 0, 6]])
    >>> sparsefuncs.inplace_row_scale(csr, scale)
    >>> csr.todense()
     matrix([[16,  2,  0,  0],
             [ 0,  0,  6,  0],
             [ 0,  0,  0, 20],
             [ 0,  0,  0, 30]])
    r+   r(   N)r   r   r   r"   r.   r'   r   r    r   r   r   inplace_row_scaleN  s
   $r:   c                 C   sv   ||fD ]}t |tjrtdq|dk r|| jd 7 }|dk r'|| jd 7 }| j|k}|| j| j|k< || j|< dS )aK  Swap two rows of a CSC matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
     m and n should be valid integersr   N)
isinstancer#   ndarrayr   r   r   )r   mntm_maskr   r   r   inplace_swap_row_cscz  s   
rB   c              	   C   sx  ||fD ]}t |tjrtdq|dk r|| jd 7 }|dk r'|| jd 7 }||kr0||}}| j}|| }||d  }|| }||d  }|| }	|| }
|	|
krr| j|d |  |
|	 7  < ||
 | j|d < ||	 | j|< t| jd| | j|| | j|| | j|| | j|d g| _t| jd| | j|| | j|| | j|| | j|d g| _dS )aK  Swap two rows of a CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSR format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    r;   r   r   r   N)	r<   r#   r=   r   r   r&   concatenater   r   )r   r>   r?   r@   r&   m_startm_stopn_startn_stopnz_mnz_nr   r   r   inplace_swap_row_csr  sJ   
	
rJ   c                 C   sT   t | r| jdkrt| || dS t | r$| jdkr$t| || dS t|  dS )a  
    Swap two rows of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of CSR or
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_row(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 0, 5],
            [8, 0, 2],
            [0, 0, 0],
            [0, 0, 0]])
    r+   r(   N)r   r   r   rB   rJ   r   r   r>   r?   r   r   r   inplace_swap_row  s
   %rL   c                 C   s   |dk r|| j d 7 }|dk r|| j d 7 }t| r(| jdkr(t| || dS t| r:| jdkr:t| || dS t|  dS )a  
    Swap two columns of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two columns are to be swapped. It should be of
        CSR or CSC format.

    m : int
        Index of the column of X to be swapped.

    n : int
        Index of the column of X to be swapped.

    Examples
    --------
    >>> from sklearn.utils import sparsefuncs
    >>> from scipy import sparse
    >>> import numpy as np
    >>> indptr = np.array([0, 2, 3, 3, 3])
    >>> indices = np.array([0, 2, 2])
    >>> data = np.array([8, 2, 5])
    >>> csr = sparse.csr_matrix((data, indices, indptr))
    >>> csr.todense()
    matrix([[8, 0, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    >>> sparsefuncs.inplace_swap_column(csr, 0, 1)
    >>> csr.todense()
    matrix([[0, 8, 2],
            [0, 0, 5],
            [0, 0, 0],
            [0, 0, 0]])
    r   r   r+   r(   N)r   r   r   r   rJ   rB   r   rK   r   r   r   inplace_swap_column  s   %rM   c                 C   s<   t | r| jdv r|rt| |dS t| |dS t|  dS )a  Compute minimum and maximum along an axis on a CSR or CSC matrix.

     Optionally ignore NaN values.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    ignore_nan : bool, default=False
        Ignore or passing through NaN values.

        .. versionadded:: 0.20

    Returns
    -------

    mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise minima.

    maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise maxima.
    )r(   r+   r   N)r   r   r   r   r   r   )r   r   
ignore_nanr   r   r   min_max_axis4  s
   rO   c                 C   s   |dkrd}n|dkrd}n| j dkrtd | j |du r0|du r&| jS tt| j|S |dkrGt| j}|du rC|dS || S |dkrp|du rZtj| j	| j
d d	S t|t| j}tj| j	| j
d |d
S td |)a  A variant of X.getnnz() with extension to weighting on axis 0.

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_labels)
        Input data. It should be of CSR format.

    axis : {0, 1}, default=None
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.

    Returns
    -------
    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)
        Number of non-zero values in the array along a given axis. Otherwise,
        the total number of non-zero values in the array is returned.
    r   r   r(   z#Expected CSR sparse format, got {0}Nintp)	minlength)rS   r)   zUnsupported axis: {0})r   r   nnzr#   dotr%   r&   astypebincountr   r   r$   r   )r   r   sample_weightoutr)   r   r   r   count_nonzeroX  s*   

rZ   c                 C   sp   t | | }|stjS t| dk }t|d\}}|   |r&t|| ||S t|d | ||t|| || d S )zCompute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data
    in-place.
    r   r   r   g       @)lenr#   nanrZ   divmodsort_get_elem_at_rank)r   n_zerosn_elems
n_negativemiddleis_oddr   r   r   _get_median  s   re   c                 C   s,   | |k r||  S | | |k rdS || |  S )z@Find the value in data augmented with n_zeros for the given rankr   r   )rankr   rb   r`   r   r   r   r_     s
   r_   c           
      C   s   t | r
| jdkstd| j | j}| j\}}t|}tt	|dd |dd D ]\}\}}t
| j|| }||j }	t||	||< q-|S )aC  Find the median across axis 0 of a CSC matrix.

    It is equivalent to doing np.median(X, axis=0).

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSC format.

    Returns
    -------
    median : ndarray of shape (n_features,)
        Median.
    r+   z%Expected matrix of CSC format, got %sNrP   r   )r   r   r   r   r&   r   r#   zeros	enumeratezipcopyr   r4   re   )
r   r&   	n_samples
n_featuresmedianf_indstartendr   nzr   r   r   csc_median_axis_0  s   

*
rr   c                    sV   dddf  j t fdd fddfddfdd j jdS )aA  Create an implicitly offset linear operator.

    This is used by PCA on sparse data to avoid densifying the whole data
    matrix.

    Params
    ------
        X : sparse matrix of shape (n_samples, n_features)
        offset : ndarray of shape (n_features,)

    Returns
    -------
    centered : LinearOperator
    Nc                        |  |   S Nr   xr   offsetr   r   <lambda>      z)_implicit_column_offset.<locals>.<lambda>c                    rs   rt   r   ru   rw   r   r   ry     rz   c                    s    |  |     S rt   )sumru   XTrx   r   r   ry     s    c                    s&    |  j | jddd d d f   S )Nr   r   )r.   r{   ru   r|   r   r   ry     s   & )matvecmatmatrmatvecrmatmatr0   r   )r.   r   r0   r   rw   r   )r   r}   rx   r   _implicit_column_offset  s   r   )NF)F)NN)&__doc__numpyr#   scipy.sparsesparser   scipy.sparse.linalgr   utils.fixesr   r   utils.validationr   sparsefuncs_fastr   r-   r	   r,   r
   r6   r   r   r"   r'   r/   r7   r9   r:   rB   rJ   rL   rM   rO   rZ   re   r_   rr   r   r   r   r   r   <module>   s6    )
Ns-,?-
1
$6	