o
    ?Hhd                     @   s  d dl Z d dlZd dlZd dlZd dlZ	 g dZe dZe dZ	e dZ
e dZe dZe dZe d	Ze d
Ze dZe dZe dZG dd deZG dd deZG dd dZG dd deZG dd deZG dd deZG dd deZG dd deZdd Zd d! Zd"d# Zd6d$d%Zd&d' Z d(d) Z!d*d+ Z"d,d- Z#d.d/ Z$G d0d1 d1Z%d2d3 Z&d4d5 Z'dS )7    N)MetaDataloadarff	ArffErrorParseArffErrorz^\s*@z^%z^\s+$z^\s*@\S*z^@[Dd][Aa][Tt][Aa]z*^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)z2^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)z{(.+)}z%[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$z'(..+)'\s+(..+$)z(\S+)\s+(..+$)c                   @      e Zd ZdS )r   N__name__
__module____qualname__ r   r   W/home/air/sanwanet/gpt-api/venv/lib/python3.10/site-packages/scipy/io/arff/_arffread.pyr   6       r   c                   @   r   )r   Nr   r   r   r   r   r   :   r   r   c                   @   s4   e Zd ZdZdd Zedd Zdd Zdd	 ZdS )
	AttributeNc                 C   s   || _ d | _tj| _d S N)namerangenpobject_dtypeselfr   r   r   r   __init__E   s   zAttribute.__init__c                 C      dS )zj
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.
        Nr   clsr   attr_stringr   r   r   parse_attributeJ   s   zAttribute.parse_attributec                 C   r   )-
        Parse a value of this type.
        Nr   r   data_strr   r   r   
parse_dataR   s   zAttribute.parse_datac                 C   s   | j d | j S )r   ,)r   	type_namer   r   r   r   __str__X   s   zAttribute.__str__)	r   r	   r
   r"   r   classmethodr   r    r$   r   r   r   r   r   A   s    
r   c                       sH   e Zd ZdZ fddZedd Zedd Zdd	 Z	d
d Z
  ZS )NominalAttributenominalc                    s6   t  | || _|| _tjtdd |D f| _d S )Nc                 s       | ]}t |V  qd S r   )len.0ir   r   r   	<genexpr>g       z,NominalAttribute.__init__.<locals>.<genexpr>)superr   valuesr   r   bytes_maxr   )r   r   r0   	__class__r   r   r   c   s   zNominalAttribute.__init__c                 C   s0   t | }|rt|d\}}t|S td)a8  Given a string containing a nominal type, returns a tuple of the
        possible values.

        A nominal type is defined as something framed between braces ({}).

        Parameters
        ----------
        atrv : str
           Nominal type definition

        Returns
        -------
        poss_vals : tuple
           possible values

        Examples
        --------
        >>> from scipy.io.arff._arffread import NominalAttribute
        >>> NominalAttribute._get_nom_val("{floup, bouga, fl, ratata}")
        ('floup', 'bouga', 'fl', 'ratata')
           z(This does not look like a nominal string)	r_nominalmatchsplit_data_linegrouptuple
ValueError)atrvmattrs_r   r   r   _get_nom_vali   s
   
zNominalAttribute._get_nom_valc                 C   s$   |d dkr|  |}| ||S dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For nominal attributes, the attribute string would be like '{<attr_1>,
         <attr2>, <attr_3>}'.
        r   {N)r@   )r   r   r   r0   r   r   r   r      s   	

z NominalAttribute.parse_attributec                 C   s6   || j v r|S |dkr|S tt| dt| j  )r   ?z value not in )r0   r;   strr   r   r   r   r       s
   
zNominalAttribute.parse_datac                 C   sN   | j d }tt| jd D ]}|| j| d 7 }q|| jd 7 }|d7 }|S )Nz,{r5   r!   })r   r   r)   r0   r   msgr,   r   r   r   r$      s   
zNominalAttribute.__str__)r   r	   r
   r"   r   staticmethodr@   r%   r   r    r$   __classcell__r   r   r3   r   r&   _   s    

r&   c                       s8   e Zd Z fddZedd Zdd Zdd Z  ZS )	NumericAttributec                    s   t  | d| _tj| _d S )Nnumeric)r/   r   r"   r   float64r   r   r3   r   r   r      s   zNumericAttribute.__init__c                 C   sT   |   }|dtd dks$|dtd dks$|dtd dkr(| |S dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For numeric attributes, the attribute string would be like
        'numeric' or 'int' or 'real'.
        NrK   intreallowerstripr)   r   r   r   r   r      s   
z NumericAttribute.parse_attributec                 C   s   d|v rt jS t|S )a  
        Parse a value of this type.

        Parameters
        ----------
        data_str : str
           string to convert

        Returns
        -------
        f : float
           where float can be nan

        Examples
        --------
        >>> from scipy.io.arff._arffread import NumericAttribute
        >>> atr = NumericAttribute('atr')
        >>> atr.parse_data('1')
        1.0
        >>> atr.parse_data('1\n')
        1.0
        >>> atr.parse_data('?\n')
        nan
        rB   )r   nanfloatr   r   r   r   r       s   zNumericAttribute.parse_datac                 C   s<   |j d |j d  }t|t|t|t|| fS )Ng      ?r5   )sizer   nanminnanmaxmeanstd)r   datanbfacr   r   r   _basic_stats   s   zNumericAttribute._basic_stats)	r   r	   r
   r   r%   r   r    r[   rI   r   r   r3   r   rJ      s    
rJ   c                       s(   e Zd Z fddZedd Z  ZS )StringAttributec                    s   t  | d| _d S )Nstring)r/   r   r"   r   r3   r   r   r      s   
zStringAttribute.__init__c                 C   s,   |   }|dtd dkr| |S dS )z
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For string attributes, the attribute string would be like
        'string'.
        Nr]   rO   r   r   r   r   r         
zStringAttribute.parse_attribute)r   r	   r
   r   r%   r   rI   r   r   r3   r   r\      s    r\   c                       sH   e Zd Z fddZedd Zedd Zdd Z fd	d
Z	  Z
S )DateAttributec                    s8   t  | || _|| _d| _|| _td| j| _d S )Ndater   )	r/   r   date_formatdatetime_unitr"   r   r   
datetime64r   )r   r   ra   rb   r3   r   r   r     s   zDateAttribute.__init__c                 C   s   t | }|rz|d }d }d|v r|dd}d}n		 |dd}d}d|v r2|dd}d	}d
|v r>|d
d}d}d|v rJ|dd}d}d|v rV|dd}d}d|v rb|dd}d}d|v sjd|v rntd|d u rvtd||fS td)Nr5   yyyyz%YYyyz%yMMz%mMddz%dDHHz%Hhmmz%Mr=   ssz%SszZz6Date type attributes with time zone not supported, yetz"Invalid or unsupported date formatzInvalid or no date format)r_dater7   r9   rQ   replacer;   )r<   r=   patternrb   r   r   r   _get_date_format
  s>   
zDateAttribute._get_date_formatc                 C   s>   |   }|dtd dkr| |\}}| |||S dS )
        Parse the attribute line if it knows how. Returns the parsed
        attribute, or None.

        For date attributes, the attribute string would be like
        'date <format>'.
        Nr`   )rP   rQ   r)   ru   )r   r   r   attr_string_lowerra   rb   r   r   r   r   1  s
   
zDateAttribute.parse_attributec                 C   sT   |   d d}|dkrtd| jS tj|| j}t|d| j dS )r   '"rB   NaTzdatetime64[])rQ   r   rc   rb   datetimestrptimera   astype)r   r   date_strdtr   r   r   r    C  s   
zDateAttribute.parse_datac                    s   t   d | j S )Nr!   )r/   r$   ra   r#   r3   r   r   r$   O  s   zDateAttribute.__str__)r   r	   r
   r   rH   ru   r%   r   r    r$   rI   r   r   r3   r   r_      s    
&
r_   c                       s<   e Zd Z fddZedd Zdd Z fddZ  ZS )	RelationalAttributec                    s*   t  | d| _tj| _g | _d | _d S )N
relational)r/   r   r"   r   r   r   
attributesdialectr   r3   r   r   r   U  s
   
zRelationalAttribute.__init__c                 C   s,   |   }|dtd dkr| |S dS )rv   Nr   rO   )r   r   r   rw   r   r   r   r   \  r^   z#RelationalAttribute.parse_attributec                    s|   t ttj}| d}g }|dD ]}t|j\ _|	t
 fdd|D  qt|dd jD S )Nzunicode-escape
c                    s    g | ]}j |  | qS r   )r   r    r*   rowr   r   r   
<listcomp>y  s     z2RelationalAttribute.parse_data.<locals>.<listcomp>c                 S      g | ]}|j |jfqS r   r   r   r+   ar   r   r   r   |      )listr   r)   r   encodedecodesplitr8   r   appendr:   r   array)r   r   elemsescaped_string
row_tuplesrawr   r   r   r    m  s   zRelationalAttribute.parse_datac                    s$   t   d ddd | jD  S )Nz
	c                 s   r(   r   )rC   r   r   r   r   r-     r.   z.RelationalAttribute.__str__.<locals>.<genexpr>)r/   r$   joinr   r#   r3   r   r   r$   ~  s   zRelationalAttribute.__str__)	r   r	   r
   r   r%   r   r    r$   rI   r   r   r3   r   r   S  s    
r   c                 C   sB   t ttttf}|D ]}|| |}|d ur|  S q	td| )Nzunknown attribute )r&   rJ   r_   r\   r   r   r   )r   r   attr_classesr   attrr   r   r   to_attribute  s   r   c                  C   s8   t tdd} | du rt d}|jdkt_tj} | S )zL
    Checks if the bug https://bugs.python.org/issue30157 is unpatched.
    has_bugNz3, 'a'rx   )getattrcsv_sniffer_has_bug_last_fieldcsvSniffersniff	quotecharr   )r   r   r   r   r   r     s   r   c                 C   s4  t  rd}dd|dfD ]}t|tjtjB }|| }|r  nq||kr'dS |j}t|dks2J |d }|d d }	||	 }
|d	 d }	||	 }|d
 d }	t||	 }tdt	| d|
 dt	| d|
 dt	| d|
 dt	| dtj}t|
| }|
|_||v r||_||_||_dS dS )zT
    Workaround for the bug https://bugs.python.org/issue30157 if is unpatched.
    zG(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)zI(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)zG(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)z-(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)Nr5   r   quotedelimspacez((z)|^)\W*z[^z\n]*z\W*((z)|$))r   recompileDOTALL	MULTILINEfindall
groupindexr)   boolescapesearchr   	delimiterdoublequoteskipinitialspace)
sniff_liner   
delimitersright_regexrestrregexpmatchesr   r=   nr   r   r   	dq_regexpr   r   r   r   %workaround_csv_sniffer_bug_last_field  sB   
J
r   c                    s   d}t ttdjd   d dkr d d      }t fdd|D s0|d7 }|d u rDt  j	||d}t
|||d	 tt  g|}||fS )
Nz,	rD      r   c                 3   s    | ]}| v V  qd S r   r   )r+   dliner   r   r-     r.   z"split_data_line.<locals>.<genexpr>r!   )r   )r   r   r   )r   field_size_limitrM   ctypesc_ulongvaluerQ   anyr   r   r   nextreader)r   r   r   r   r   r   r   r   r8     s    r8   c                 C   s   |  }t|}|r4|d}t|r t|\}}t| }nt|r0t|\}}t| }nt	dt	d| t
||}| dkrLt| ||}||fS )a  Parse a raw string in header (e.g., starts by @attribute).

    Given a raw string attribute, try to get the name and type of the
    attribute. Constraints:

    * The first line must start with @attribute (case insensitive, and
      space like characters before @attribute are allowed)
    * Works also if the attribute is spread on multilines.
    * Works if empty lines or comments are in between

    Parameters
    ----------
    attribute : str
       the attribute string.

    Returns
    -------
    name : str
       name of the attribute
    value : str
       value of the attribute
    next : str
       next line to be parsed

    Examples
    --------
    If attribute is a string defined in python as r"floupi real", will
    return floupi as name, and real as value.

    >>> from scipy.io.arff._arffread import tokenize_attribute
    >>> iterable = iter([0] * 10) # dummy iterator
    >>> tokenize_attribute(iterable, r"@attribute floupi real")
    ('floupi', 'real', 0)

    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
    and real as value.

    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
    ('floupi 2', 'real', 0)

    r5   zmulti line not supported yetzFirst line unparsable: r   )rQ   r_attributer7   r9   r_comattrvaltokenize_single_commar   r_wcomattrvaltokenize_single_wcommar;   r   rP   read_relational_attribute)iterable	attributesattrmattrr<   r   type	next_itemr   r   r   tokenize_attribute  s    *






r   c              
   C   f   t | }|r,z|d }|d }W ||fS  ty+ } ztd|d }~ww td|  Nr5   r   z Error while tokenizing attributezError while tokenizing single )r   r7   r9   rQ   
IndexErrorr;   valr=   r   r   er   r   r   r   <     

r   c              
   C   r   r   )r   r7   r9   rQ   r   r;   r   r   r   r   r   K  r   r   c                 C   s   t d|j d }||s;t|}|r2t|}|r+t| |\}}|j| nt	d| t
| }||rt
| }|S )z4Read the nested attributes of a relational attributez^@[Ee][Nn][Dd]\s*z\s*$Error parsing line )r   r   r   r7   r_headerliner   r   r   r   r;   r   )ofilerelational_attributer,   r_end_relationalr=   isattrr   r   r   r   r   Z  s"   



r   c                 C   s   t | }t|rt | }t|s	d}g }t|sXt|}|rOt|}|r6t| |\}}|| nt|}|rC|	d}nt
d| t | }nt | }t|r||fS )z&Read the header of the iterable ofile.Nr5   r   )r   	r_commentr7   
r_datametar   r   r   r   
r_relationr9   r;   )r   r,   relationr   r=   r   r   isrelr   r   r   read_headerp  s*   







r   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )r   a  Small container to keep useful information on a ARFF dataset.

    Knows about attributes names and types.

    Examples
    --------
    ::

        data, meta = loadarff('iris.arff')
        # This will print the attributes names of the iris.arff dataset
        for i in meta:
            print(i)
        # This works too
        meta.names()
        # Getting attribute type
        types = meta.types()

    Methods
    -------
    names
    types

    Notes
    -----
    Also maintains the list of attributes in order, i.e., doing for i in
    meta, where meta is an instance of MetaData, will return the
    different attribute names in the order they were defined.
    c                 C   s   || _ dd |D | _d S )Nc                 S   s   i | ]}|j |qS r   )r   r   r   r   r   
<dictcomp>  s    z%MetaData.__init__.<locals>.<dictcomp>)r   _attributes)r   relr   r   r   r   r     s   zMetaData.__init__c                 C   sp   d}|d| j  d7 }| jD ]'}|d| d| j| j 7 }| j| jr1|dt| j| j 7 }|d7 }q|S )N z	Dataset: r   	z's type is z, range is )r   r   r"   r   rC   rF   r   r   r   __repr__  s   

zMetaData.__repr__c                 C   
   t | jS r   )iterr   r#   r   r   r   __iter__  s   
zMetaData.__iter__c                 C   s   | j | }|j|jfS r   )r   r"   r   )r   keyr   r   r   r   __getitem__  s   
zMetaData.__getitem__c                 C   r   )zReturn the list of attribute names.

        Returns
        -------
        attrnames : list of str
            The attribute names.
        )r   r   r#   r   r   r   names  s   
zMetaData.namesc                    s    fdd j D }|S )zReturn the list of attribute types.

        Returns
        -------
        attr_types : list of str
            The attribute types.
        c                    s   g | ]} j | jqS r   )r   r"   )r+   r   r#   r   r   r     s    z"MetaData.types.<locals>.<listcomp>)r   )r   
attr_typesr   r#   r   types  s   
zMetaData.typesN)
r   r	   r
   __doc__r   r   r   r   r   r   r   r   r   r   r     s    

r   c                 C   sJ   t | dr| }nt| }zt|W || ur|  S S || ur$|  w w )a  
    Read an arff file.

    The data is returned as a record array, which can be accessed much like
    a dictionary of NumPy arrays. For example, if one of the attributes is
    called 'pressure', then its first 10 data points can be accessed from the
    ``data`` record array like so: ``data['pressure'][0:10]``


    Parameters
    ----------
    f : file-like or str
       File-like object to read from, or filename to open.

    Returns
    -------
    data : record array
       The data of the arff file, accessible by attribute names.
    meta : `MetaData`
       Contains information about the arff file such as name and
       type of attributes, the relation (name of the dataset), etc.

    Raises
    ------
    ParseArffError
        This is raised if the given file is not ARFF-formatted.
    NotImplementedError
        The ARFF file has an attribute which is not supported yet.

    Notes
    -----

    This function should be able to read most arff files. Not
    implemented functionality include:

    * date type attributes
    * string type attributes

    It can read files with numeric and nominal attributes. It cannot read
    files with sparse data ({} in the file). However, this function can
    read files with missing data (? in the file), representing the data
    points as NaNs.

    Examples
    --------
    >>> from scipy.io import arff
    >>> from io import StringIO
    >>> content = """
    ... @relation foo
    ... @attribute width  numeric
    ... @attribute height numeric
    ... @attribute color  {red,green,blue,yellow,black}
    ... @data
    ... 5.0,3.25,blue
    ... 4.5,3.75,green
    ... 3.0,4.00,red
    ... """
    >>> f = StringIO(content)
    >>> data, meta = arff.loadarff(f)
    >>> data
    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
    >>> meta
    Dataset: foo
    	width's type is numeric
    	height's type is numeric
    	color's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')

    read)hasattropen	_loadarffclose)fr   r   r   r   r     s   
F


r   c           	   
      s   zt | \} W n ty } zdt| }t||d }~ww d} D ]	}t|tr-d}q$t| }|r9tdt d
 fdd	}t	|| }t
|dd	  D }||fS )Nz'Error while parsing header, error was: FTz*String attributes not supported yet, sorryr!   c                 3   s^    t t}d }| D ]!}t|st|rqt||\ }t fdd|D V  qd S )Nc                    s   g | ]} |  | qS r   )r    r*   )r   r   r   r   r   c  s    z0_loadarff.<locals>.generator.<locals>.<listcomp>)r   r   r   r7   r_emptyr8   r:   )row_iterr   r   r   r   r   ni)r   r   	generatorI  s   z_loadarff.<locals>.generatorc                 S   r   r   r   r   r   r   r   r   g  r   z_loadarff.<locals>.<listcomp>)r!   )r   r;   rC   r   
isinstancer\   r   NotImplementedErrorr)   r   r   r   )	r   r   r   rG   hasstrr   metar   rY   r   r   r   r   *  s(   



r   r   )(r   r|   numpyr   r   r   __all__r   r_metar   r   r   r   r   r   r6   rr   r   r   OSErrorr   r   r   r&   rJ   r\   r_   r   r   r   r   r8   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sL   










K>S3
2#EJQ