o
    U|%i                     @   s   d Z ddlmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	Zd
eeejf deeejf fddZG dd dee	ZdS )zOutlier/anomaly detection utilities for MySQL Connector/Python.

Provides a scikit-learn compatible wrapper using HeatWave to score anomalies.
    )OptionalUnionN)OutlierMixin)MyBaseMLModel)ML_TASK)	copy_dict)MySQLConnectionAbstractgh㈵>probreturnc                 C   s$   t | tdt }t |d|  S )z
    Compute logit (logodds) for a probability, clipping to avoid numerical overflow.

    Args:
        prob: Scalar or array of probability values in (0,1).

    Returns:
        logit-transformed probabilities.
       )npclipEPSlog)r	   result r   P/home/air/sos_test/back/venv/lib/python3.10/site-packages/mysql/ai/ml/outlier.py_get_logits0   s   
r   c                
   @   s   e Zd ZdZ			ddedee dee dee fddZd	e	e
jejf d
ejfddZd	e	e
jejf d
ejfddZd	e	e
jejf d
ejfddZdS )MyAnomalyDetectora  
    MySQL HeatWave scikit-learn compatible anomaly/outlier detector.

    Flags samples as outliers when the probability of being an anomaly
    exceeds a user-tunable threshold.
    Includes helpers to obtain decision scores and anomaly probabilities
    for ranking.

    Args:
        db_connection (MySQLConnectionAbstract): Active MySQL DB connection.
        model_name (str, optional): Custom model name in the database.
        fit_extra_options (dict, optional): Extra options for fitting.
        score_extra_options (dict, optional): Extra options for scoring/prediction.

    Attributes:
        boundary: Decision threshold boundary in logit space. Derived from
            trained model's catalog info

    Methods:
        predict(X): Predict outlier/inlier labels.
        score_samples(X): Compute anomaly (normal class) logit scores.
        decision_function(X): Compute signed score above/below threshold for ranking.
    Ndb_connection
model_namefit_extra_optionsscore_extra_optionsc                 C   s*   t j| |tj||d t|| _d| _dS )a2  
        Initialize an anomaly detector instance with threshold and extra options.

        Args:
            db_connection: Active MySQL DB connection.
            model_name: Optional model name in DB.
            fit_extra_options: Optional extra fit options.
            score_extra_options: Optional extra scoring options.

        Raises:
            ValueError: If outlier_threshold is not in (0,1).
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )r   r   N)r   __init__r   ANOMALY_DETECTIONr   r   boundary)selfr   r   r   r   r   r   r   r   W   s   

zMyAnomalyDetector.__init__Xr
   c                 C   s   t | |dk ddS )a  
        Predict outlier/inlier binary labels for input samples.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Values are -1 for outliers, +1 for inliers, as per scikit-learn convention.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        g        r   )r   wheredecision_function)r   r   r   r   r   predictw   s   zMyAnomalyDetector.predictc                 C   sh   |  |}| jdu r/|  }|du rtd|d d dd}|du r(tdtd| | _|| j S )a  
        Compute signed distance to the outlier threshold.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Score > 0 means inlier, < 0 means outlier; |value| gives margin.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            ValueError:
                If the provided model info does not provide threshold
        Nz Model does not exist in catalog.model_metadatatraining_paramsanomaly_detection_thresholdzzTrained model is outdated and does not support threshold. Try retraining or using an existing, trained model with MyModel.g      ?)score_samplesr   get_model_info
ValueErrorgetr   )r   r   sample_scores
model_info	thresholdr   r   r   r       s   


z#MyAnomalyDetector.decision_functionc                 C   s,   | j j|| jd}t|d dd  S )aJ  
        Compute normal probability logit score for each sample.
        Used for ranking, thresholding.

        Args:
            X: Samples to score.

        Returns:
            ndarray: Logit scores based on "normal" class probability.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )options
ml_resultsc                 S   s   | d d S )Nprobabilitiesnormalr   )xr   r   r   <lambda>   s    z1MyAnomalyDetector.score_samples.<locals>.<lambda>)_modelr!   r   r   applyto_numpy)r   r   r   r   r   r   r%      s   
zMyAnomalyDetector.score_samples)NNN)__name__
__module____qualname____doc__r   r   strdictr   r   pd	DataFramer   ndarrayr!   r    r%   r   r   r   r   r   >   s:    
 

-r   )r8   typingr   r   numpyr   pandasr;   sklearn.baser   mysql.ai.ml.baser   mysql.ai.ml.modelr   mysql.ai.utilsr   mysql.connector.abstractsr   r   floatr=   r   r   r   r   r   r   <module>   s   &