o
    U|%iE                     @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZ ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ d	Zd
ZddddddZd	Z dee! de!fddZ"ededee#e$e%e%f   fddZ&	dCdede%deddfddZ'de%fddZ(dDd e
e%ge)f d!e*de%fd"d#Z+d$e	dee%ee	 f fd%d&Z,dedej-fd'd(Z.ded)e%d*e%dej-fd+d,Z/ded)e%d*e%de)fd-d.Z0ded)e%d*e%ddfd/d0Z1	dCded)e%d*e%d1ej-d2ee%e%f ddfd3d4Z2ded)e%d1ej-dee%e%f fd5d6Z3d7e%de%fd8d9Z4dede%fd:d;Z5ded)e%d*e%de)fd<d=Z6	>dEd?eeej-ej7ej8f  d@e%deej- fdAdBZ9dS )Fa.  General utilities for AI features in MySQL Connector/Python.

Includes helpers for:
- defensive dict copying
- temporary table lifecycle management
- SQL execution and result conversions
- DataFrame to/from SQL table utilities
- schema/table/column name validation
- array-like to DataFrame conversion
    N)contextmanager)AnyCallableDictIteratorListOptionalTupleUnion)atomic_transaction)MySQLConnectionAbstract)MySQLCursorAbstract)ParamsSequenceOrDictTypemysql_ai    BIGINTDOUBLELONGTEXTBOOLEANDATETIME)int64float64objectboolzdatetime64[ns]optionsreturnc                 C   s   | du ri S t | S )z
    Make a defensive copy of a dictionary, or return an empty dict if None.

    Args:
        options: param dict or None

    Returns:
        dict
    N)copydeepcopy)r    r   Q/home/air/sos_test/back/venv/lib/python3.10/site-packages/mysql/ai/utils/utils.py	copy_dictI   s   

r    db_connectionc                 c   s    g }z(|V  W t | }|D ]
\}}t||| qW d   dS 1 s%w   Y  dS t | }|D ]
\}}t||| q3W d   w 1 sHw   Y  w )a  
    Context manager to track and automatically clean up temporary SQL tables.

    Args:
        db_connection: Database connection object used to create and delete tables.

    Returns:
        None

    Raises:
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.

    Yields:
        temporary_tables: List of (schema_name, table_name) tuples created during the
            context. All tables in this list are deleted on context exit.
    N)r   delete_sql_table)r!   temporary_tablescursorschema_name
table_namer   r   r   temporary_sql_tablesY   s   
,r'   r$   queryparamsc                 C   s   |  ||pd dS )aB  
    Execute an SQL query with optional parameters using the given cursor.

    Args:
        cursor: MySQLCursorAbstract object to execute the query.
        query: SQL query string to execute.
        params: Optional sequence or dict providing parameters for the query.

    Raises:
        DatabaseError:
            If the provided SQL query/params are invalid
            If the query is valid but the sql raises as an exception
            If a database connection issue occurs.
            If an operational error occurs during execution.

    Returns:
        None
    r   N)execute)r$   r(   r)   r   r   r   execute_sqlx   s   r+   c                  C   s   t j} dtj| tdS )z
    Generate a random uppercase string of fixed length for table names.

    Returns:
        Random string of length RANDOM_TABLE_NAME_LENGTH.
     )k)stringascii_uppercasejoinrandomchoicesRANDOM_TABLE_NAME_LENGTH)char_setr   r   r   	_get_name   s   r5   d   	condition	max_callsc                 C   s,   t |D ]}| t  }r|  S qtd)a  
    Generate a random string name that satisfies a given condition.

    Args:
        condition: Callable that takes a generated name and returns True if it is valid.
        max_calls: Maximum number of attempts before giving up (default 100).

    Returns:
        A random string that fulfills the provided condition.

    Raises:
        RuntimeError: If the maximum number of attempts is reached without success.
    z<Reached max tries without successfully finding a unique name)ranger5   RuntimeError)r7   r8   _namer   r   r   get_random_name   s
   r=   valuec                 C   s>   t | ttfrt| dkrddgfS dt| gfS d| gfS )a.  
    Convert a Python value into its SQL-compatible string representation and parameters.

    Args:
        value: The value to format.

    Returns:
        Tuple containing:
            - A string for substitution into a SQL query.
            - A list of parameters to be bound into the query.
    r   z%sNzCAST(%s as JSON))
isinstancedictlistlenjsondumps)r>   r   r   r   format_value_sql   s
   

rE   c                 C   s   dt t dt t fdd}dtdtfdd}i }t| jD ]\}}|d dkr,|||< q|||< q|  }g }|D ]}t|}	t|D ]\}}
|| |
|	|< qC||	 q9t	j
|| jd	S )
a  
    Convert the results of a cursor's last executed query to a pandas DataFrame.

    Args:
        cursor: MySQLCursorAbstract with a completed query.

    Returns:
        DataFrame with data from the cursor.

    Raises:
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.
            If a compatible SELECT query wasn't the last statement ran
    elemr   c                 S   s   | d ur	t | S d S N)rC   loadsrF   r   r   r   _json_processor   s   z+sql_response_to_df.<locals>._json_processorc                 S   s   | S rG   r   rI   r   r   r   _default_processor   s   z.sql_response_to_df.<locals>._default_processor      )columns)r   strr@   r   	enumeratedescriptionfetchallrA   appendpd	DataFramecolumn_names)r$   rJ   rK   idx_to_processoridxcolrowsprocessed_rowsrowprocessed_rowrF   r   r   r   sql_response_to_df   s   

r^   r%   r&   c                 C   s.   t | t | t| d| d|  t| S )aD  
    Load the entire contents of a SQL table into a pandas DataFrame.

    Args:
        cursor: MySQLCursorAbstract to execute the query.
        schema_name: Name of the schema containing the table.
        table_name: Name of the table to fetch.

    Returns:
        DataFrame containing all rows from the specified table.

    Raises:
        DatabaseError:
            If the table does not exist
            If a database connection issue occurs.
            If an operational error occurs during execution.
        ValueError: If the schema or table name is not valid
    zSELECT * FROM .)validate_namer+   r^   r$   r%   r&   r   r   r   sql_table_to_df   s   rb   c                 C   s,   t | t | | d||f |  duS )a  
    Check whether a table exists in a specific schema.

    Args:
        cursor: MySQLCursorAbstract object to execute the query.
        schema_name: Name of the database schema.
        table_name: Name of the table.

    Returns:
        True if the table exists, False otherwise.

    Raises:
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.
        ValueError: If the schema or table name is not valid
    z
        SELECT 1
        FROM information_schema.tables
        WHERE table_schema = %s AND table_name = %s
        LIMIT 1
        Nr`   r*   fetchonera   r   r   r   table_exists  s   	re   c                 C   s*   t | t | t| d| d|  dS )a  
    Drop a table from the SQL database if it exists.

    Args:
        cursor: MySQLCursorAbstract to execute the drop command.
        schema_name: Name of the schema.
        table_name: Name of the table to delete.

    Returns:
        None

    Raises:
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.
        ValueError: If the schema or table name is not valid
    zDROP TABLE IF EXISTS r_   N)r`   r+   ra   r   r   r   r"   6  s   r"   dfcol_name_to_placeholder_stringc                 C   s  |du ri }t | t | |jD ]}t t| q| d| }|jD ]_}g g }}	t||jD ].\}
}t|
dr>|
 n|
}
||v rO|| t|
g}}nt|
\}}|| |		| q1d
dd |jD }d
|}d| d| d	| d
}t| ||	d q$dS )a  
    Insert all rows from a pandas DataFrame into an existing SQL table.

    Args:
        cursor: MySQLCursorAbstract for execution.
        schema_name: Name of the database schema.
        table_name: Table to insert new rows into.
        df: DataFrame containing the rows to insert.
        col_name_to_placeholder_string:
            Optional mapping of column names to custom SQL value/placeholder
            strings.

    Returns:
        None

    Raises:
        DatabaseError:
            If the rows could not be inserted into the table, e.g., a type or shape issue
            If a database connection issue occurs.
            If an operational error occurs during execution.
        ValueError: If the schema or table name is not valid
    Nr_   item, c                 S   s   g | ]}t |qS r   )rO   .0rY   r   r   r   
<listcomp>  s    z$extend_sql_table.<locals>.<listcomp>zINSERT INTO  (z
) VALUES ())r)   )r`   rN   rO   valuesziphasattrrh   rE   rS   extendr0   r+   )r$   r%   r&   rf   rg   rY   qualified_table_namer\   placeholdersr)   rF   elem_placeholderelem_paramscols_sqlplaceholders_sql
insert_sqlr   r   r   extend_sql_tableP  s8   





rz   c                    s  t  fdd} d| }t t| |jD ]}tt| qg }|j D ]\}}tt|d}tt| || d|  q+d	|}	t
dd |jD }
|
r]|	d	7 }	d
| d|	 d}t | zt || W ||fS  ty   t |  w )a  
    Create a new SQL table with a random name, and populate it with data from a DataFrame.

    If an 'id' column is defined in the dataframe, it will be used as the primary key.

    Args:
        cursor: MySQLCursorAbstract for executing SQL.
        schema_name: Schema in which to create the table.
        df: DataFrame containing the data to be inserted.

    Returns:
        Tuple (qualified_table_name, table_name): The schema-qualified and
        unqualified table names.

    Raises:
        RuntimeError: If a random available table name could not be found.
        ValueError: If any schema, table, or a column name is invalid.
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.
    c                    s   t  |  S rG   )re   )r&   r$   r%   r   r   <lambda>  s    z#sql_table_from_df.<locals>.<lambda>r_   r    ri   c                 s   s    | ]	}|  d kV  qdS )idN)lowerrj   r   r   r   	<genexpr>  s    z$sql_table_from_df.<locals>.<genexpr>z, PRIMARY KEY (id)zCREATE TABLE rm   rn   )r=   r`   rN   rO   dtypesitemsPD_TO_SQL_DTYPE_MAPPINGgetrS   r0   anyr+   rz   	Exceptionr"   )r$   r%   rf   r&   rs   rY   columns_sqldtypesql_typecolumns_str
has_id_colcreate_table_sqlr   r{   r   sql_table_from_df  s4   


r   r<   c                 C   s(   t | trtd| std|  | S )a  
    Validate that the string is a legal SQL identifier (letters, digits, underscores).

    Args:
        name: Name (schema, table, or column) to validate.

    Returns:
        The validated name.

    Raises:
        ValueError: If the name does not meet format requirements.
    z^[A-Za-z0-9_]+$zUnsupported name format )r?   rO   rematch
ValueError)r<   r   r   r   r`     s   r`   c                 C   sZ   | j }|du r't}t| }d| }t|| W d   n1 s"w   Y  t| |S )a  
    Retrieve the name of the currently selected schema, or set and ensure the default schema.

    Args:
        db_connection: MySQL connector database connection object.

    Returns:
        Name of the schema (database in use).

    Raises:
        ValueError: If the schema name is not valid
        DatabaseError:
            If a database connection issue occurs.
            If an operational error occurs during execution.
    NzCREATE DATABASE IF NOT EXISTS )databaseDEFAULT_SCHEMAr   r+   r`   )r!   schemar$   create_database_stmtr   r   r   source_schema  s   

r   c                 C   s4   t | t | | d| d| d |  du S )a+  
    Determine if a given SQL table is empty.

    Args:
        cursor: MySQLCursorAbstract with access to the database.
        schema_name: Name of the schema containing the table.
        table_name: Name of the table to check.

    Returns:
        True if the table has no rows, False otherwise.

    Raises:
        DatabaseError:
            If the table does not exist
            If a database connection issue occurs.
            If an operational error occurs during execution.
        ValueError: If the schema or table name is not valid
    zSELECT 1 FROM r_   z LIMIT 1Nrc   ra   r   r   r   is_table_empty  s   r   featurearr
col_prefixc                    sx   | du rdS t | tjrt| S t | tjr|  S | jdkr&| dd}  fddt| jd D }tj| |ddS )a$  
    Convert input data to a pandas DataFrame if necessary.

    Args:
        arr: Input data as a pandas DataFrame, NumPy ndarray, pandas Series, or None.

    Returns:
        If the input is None, returns None.
        Otherwise, returns a DataFrame backed by the same underlying data whenever
        possible (except in cases where pandas or NumPy must copy, such as for
        certain views or non-contiguous arrays).

    Notes:
        - If an ndarray is passed, column names will be integer indices (0, 1, ...).
        - If a DataFrame is passed, column names and indices are preserved.
        - The returned DataFrame is a shallow copy and shares data with the original
          input when possible; however, copies may still occur for certain input
          types or memory layouts.
    NrL   c                    s   g | ]	}  d | qS )r;   r   )rk   rX   r   r   r   rl   ;  s    z!convert_to_df.<locals>.<listcomp>F)rN   r   )	r?   rT   rU   Seriesto_framendimreshaper9   shape)r   r   	col_namesr   r   r   convert_to_df  s   

r   rG   )r6   )r   ):__doc__r   rC   r1   r   r.   
contextlibr   typingr   r   r   r   r   r   r	   r
   numpynppandasrT   mysql.ai.utils.atomic_cursorr   mysql.connector.abstractsr   mysql.connector.cursorr   mysql.connector.typesr   VAR_NAME_SPACEr3   r   r   r@   r    rA   tuplerO   r'   r+   r5   r   intr=   rE   rU   r^   rb   re   r"   rz   r   r`   r   r   r   ndarrayr   r   r   r   r   <module>   s   (
".

#


@

=
