o
    ;Di7                     @   s8  d dl mZmZmZ d dlmZ d dlmZmZ d dl	m
Z
mZmZmZ dd Zee_d$dd	Zee_d
d Zee_dd Zee_G dd deZdefdefffddZee_defdefffddZee_G dd deZdd Zee_dd Zee_G dd deZdd Zee_d d! Zee_d"d# Zee_dS )%    )absolute_importprint_functiondivision)Counter)string_types	maketrans)valuesTabledatawrapc                 C   s   t dd t| D S )z
    Count the number of data rows in a table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2]]
        >>> etl.nrows(table)
        2

    c                 s   s    | ]}d V  qdS    N ).0_r   r   K/var/www/Datamplify/venv/lib/python3.10/site-packages/petl/util/counting.py	<genexpr>   s    znrows.<locals>.<genexpr>)sumr
   )tabler   r   r   nrows   s   r   Nc                 C   sD   d}d}t | ||dD ]}|d7 }||kr|d7 }q|t|| fS )a  
    Count the number of occurrences of `value` under the given field. Returns
    the absolute count and relative frequency as a pair. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['a', 1],
        ...          ['b', 2],
        ...          ['b', 7]]
        >>> etl.valuecount(table, 'foo', 'b')
        (2, 0.6666666666666666)

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    r   missingr   )r   float)r   fieldvaluer   totalvsvr   r   r   
valuecount   s   r   c              	   O   sP   | dd}t }t| ||dD ]}z
||  d7  < W q ty%   Y qw |S )a3  
    Find distinct values for the given field and count the number of
    occurrences. Returns a :class:`dict` mapping values to counts. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['a', True],
        ...          ['b'],
        ...          ['b', True],
        ...          ['c', False]]
        >>> etl.valuecounter(table, 'foo')
        Counter({'b': 2, 'a': 1, 'c': 1})

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    r   Nr   r   )getr   r   
IndexError)r   r   kwargsr   counterr   r   r   r   valuecounter:   s   r#   c                 O   s   t | |fi |S )a  
    Find distinct values for the given field and count the number and relative
    frequency of occurrences. Returns a table mapping values to counts, with
    most common values first. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['a', True, 0.12],
        ...          ['a', True, 0.17],
        ...          ['b', False, 0.34],
        ...          ['b', False, 0.44],
        ...          ['b']]
        >>> etl.valuecounts(table, 'foo')
        +-----+-------+-----------+
        | foo | count | frequency |
        +=====+=======+===========+
        | 'b' |     3 |       0.6 |
        +-----+-------+-----------+
        | 'a' |     2 |       0.4 |
        +-----+-------+-----------+

        >>> etl.valuecounts(table, 'foo', 'bar')
        +-----+-------+-------+-----------+
        | foo | bar   | count | frequency |
        +=====+=======+=======+===========+
        | 'a' | True  |     2 |       0.4 |
        +-----+-------+-------+-----------+
        | 'b' | False |     2 |       0.4 |
        +-----+-------+-------+-----------+
        | 'b' | None  |     1 |       0.2 |
        +-----+-------+-------+-----------+

    If rows are short, the value of the keyword argument `missing` is counted.

    Multiple fields can be given as positional arguments. If multiple fields are
    given, these are treated as a compound key.

    )ValueCountsView)r   r   r!   r   r   r   valuecountsZ   s   (r%   c                   @   s   e Zd ZdddZdd ZdS )r$   Nc                 C   s   || _ || _|| _d S N)r   r   r   )selfr   r   r   r   r   r   __init__   s   
zValueCountsView.__init__c                 c   s    t | jttfrt| jd }n| jddf}|V  t| jg| jR d| ji}| }tdd |D }t	| jdkrV|D ]}t|d |d t
|d | f V  q>d S |D ]}|d |d t
|d | fV  qXd S )	N)count	frequencyr)   r*   r   c                 s       | ]}|d  V  qdS r   r   r   cr   r   r   r          z+ValueCountsView.__iter__.<locals>.<genexpr>r   r   )
isinstancer   tuplelistr#   r   r   most_commonr   lenr   )r'   outhdrr"   countsr   r-   r   r   r   __iter__   s   ("zValueCountsView.__iter__r&   __name__
__module____qualname__r(   r6   r   r   r   r   r$      s    
r$   intr   c           	   	   C   s   t |ttfrt|}t t }}| D ]
}d||< d||< qt| |D ],}t |trR| D ] \}}z|| W n   ||  d7  < Y q1||  d7  < q1q&||fS )ah  
    Count the number of `str` or `unicode` values under the given fields that
    can be parsed as ints, floats or via custom parser functions. Return a
    pair of `Counter` objects, the first mapping parser names to the number of
    strings successfully parsed, the second mapping parser names to the
    number of errors. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 'aaa', 2],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', '3.7', 9.0],
        ...          ['E', 42]]
        >>> counter, errors = etl.parsecounter(table, 'bar')
        >>> counter
        Counter({'float': 3, 'int': 2})
        >>> errors
        Counter({'int': 2, 'float': 1})

    The `field` argument can be a field name or index (starting from zero).

    r   r   )	r/   r1   r0   dictr   keysr   r   items)	r   r   parsersr"   errorsnr   nameparserr   r   r   parsecounter   s    

rD   c                 C   s   t | ||dS )a  
    Count the number of `str` or `unicode` values that can be parsed as ints,
    floats or via custom parser functions. Return a table mapping parser names
    to the number of values successfully parsed and the number of errors. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 'aaa', 2],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', '3.7', 9.0],
        ...          ['E', 42]]
        >>> etl.parsecounts(table, 'bar')
        +---------+-------+--------+
        | type    | count | errors |
        +=========+=======+========+
        | 'float' |     3 |      1 |
        +---------+-------+--------+
        | 'int'   |     2 |      2 |
        +---------+-------+--------+

    The `field` argument can be a field name or index (starting from zero).

    )r?   )ParseCountsView)r   r   r?   r   r   r   parsecounts   s   rF   c                   @   s,   e Zd ZdefdefffddZdd ZdS )rE   r;   r   c                 C   s,   || _ || _t|ttfrt|}|| _d S r&   )r   r   r/   r1   r0   r<   r?   )r'   r   r   r?   r   r   r   r(      s
   
zParseCountsView.__init__c                 c   sD    t | j| j| j\}}dV  | D ]\}}|||| fV  qd S )N)typer)   r@   )rD   r   r   r?   r2   )r'   r"   r@   itemrA   r   r   r   r6      s   zParseCountsView.__iter__N)r8   r9   r:   r;   r   r(   r6   r   r   r   r   rE      s    rE   c              	   C   sD   t  }t| |D ]}z||jj  d7  < W q ty   Y qw |S )a  
    Count the number of values found for each Python type.

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', u'xyz', 9.0],
        ...          ['E', 42]]
        >>> etl.typecounter(table, 'foo')
        Counter({'str': 5})
        >>> etl.typecounter(table, 'bar')
        Counter({'str': 3, 'int': 2})
        >>> etl.typecounter(table, 'baz')
        Counter({'str': 2, 'int': 1, 'float': 1, 'NoneType': 1})

    The `field` argument can be a field name or index (starting from zero).

    r   )r   r   	__class__r8   r    )r   r   r"   r   r   r   r   typecounter  s   rJ   c                 C   s
   t | |S )a  
    Count the number of values found for each Python type and return a table
    mapping class names to counts and frequencies. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          [b'A', 1, 2],
        ...          [b'B', '2', b'3.4'],
        ...          ['B', '3', '7.8', True],
        ...          ['D', u'xyz', 9.0],
        ...          ['E', 42]]
        >>> etl.typecounts(table, 'foo')
        +---------+-------+-----------+
        | type    | count | frequency |
        +=========+=======+===========+
        | 'str'   |     3 |       0.6 |
        +---------+-------+-----------+
        | 'bytes' |     2 |       0.4 |
        +---------+-------+-----------+

        >>> etl.typecounts(table, 'bar')
        +-------+-------+-----------+
        | type  | count | frequency |
        +=======+=======+===========+
        | 'str' |     3 |       0.6 |
        +-------+-------+-----------+
        | 'int' |     2 |       0.4 |
        +-------+-------+-----------+

        >>> etl.typecounts(table, 'baz')
        +------------+-------+-----------+
        | type       | count | frequency |
        +============+=======+===========+
        | 'int'      |     1 |       0.2 |
        +------------+-------+-----------+
        | 'bytes'    |     1 |       0.2 |
        +------------+-------+-----------+
        | 'str'      |     1 |       0.2 |
        +------------+-------+-----------+
        | 'float'    |     1 |       0.2 |
        +------------+-------+-----------+
        | 'NoneType' |     1 |       0.2 |
        +------------+-------+-----------+

    The `field` argument can be a field name or index (starting from zero).

    )TypeCountsViewr   r   r   r   r   
typecounts&  s   
1rM   c                   @   s   e Zd Zdd Zdd ZdS )rK   c                 C   s   || _ || _d S r&   rL   )r'   r   r   r   r   r   r(   _  s   
zTypeCountsView.__init__c                 c   s^    t | j| j}dV  | }tdd |D }|D ]}|d |d t|d | fV  qd S )N)rG   r)   r*   c                 s   r+   r   r   r,   r   r   r   r   g  r.   z*TypeCountsView.__iter__.<locals>.<genexpr>r   r   )rJ   r   r   r2   r   r   )r'   r"   r5   r   r-   r   r   r   r6   c  s   "zTypeCountsView.__iter__Nr7   r   r   r   r   rK   ]  s    rK   c                 C   sB   t dd}t }t| |D ]}t||}||  d7  < q|S )zp
    Profile string patterns in the given field, returning a :class:`dict`
    mapping patterns to counts.

    >ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789>AAAAAAAAAAAAAAAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaa9999999999r   )r   r   r   str	translate)r   r   transr"   r   pr   r   r   stringpatterncounterl  s   rT   c                    sN   t | |}dg}| }tdd |D   fdd|D }|| t|S )a  
    Profile string patterns in the given field, returning a table of patterns,
    counts and frequencies. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['Mr. Foo', '123-1254'],
        ...          ['Mrs. Bar', '234-1123'],
        ...          ['Mr. Spo', '123-1254'],
        ...          [u'Mr. Baz', u'321 1434'],
        ...          [u'Mrs. Baz', u'321 1434'],
        ...          ['Mr. Quux', '123-1254-XX']]
        >>> etl.stringpatterns(table, 'foo')
        +------------+-------+---------------------+
        | pattern    | count | frequency           |
        +============+=======+=====================+
        | 'Aa. Aaa'  |     3 |                 0.5 |
        +------------+-------+---------------------+
        | 'Aaa. Aaa' |     2 |  0.3333333333333333 |
        +------------+-------+---------------------+
        | 'Aa. Aaaa' |     1 | 0.16666666666666666 |
        +------------+-------+---------------------+

        >>> etl.stringpatterns(table, 'bar')
        +---------------+-------+---------------------+
        | pattern       | count | frequency           |
        +===============+=======+=====================+
        | '999-9999'    |     3 |                 0.5 |
        +---------------+-------+---------------------+
        | '999 9999'    |     2 |  0.3333333333333333 |
        +---------------+-------+---------------------+
        | '999-9999-AA' |     1 | 0.16666666666666666 |
        +---------------+-------+---------------------+

    )patternr)   r*   c                 s   r+   r   r   r,   r   r   r   r     r.   z!stringpatterns.<locals>.<genexpr>c                    s*   g | ]}|d  |d t |d   fqS )r   r   )r   r,   r   r   r   
<listcomp>  s   * z"stringpatterns.<locals>.<listcomp>)rT   r2   r   extendr   )r   r   r"   outputcntsr   rV   r   stringpatterns  s   
%
r[   c                 C   sD   t  }t| D ]}|t|  d7  < qdg}||  t|S )a  
    Report on row lengths found in the table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None],
        ...          ['F', 9]]
        >>> etl.rowlengths(table)
        +--------+-------+
        | length | count |
        +========+=======+
        |      3 |     3 |
        +--------+-------+
        |      2 |     2 |
        +--------+-------+
        |      4 |     1 |
        +--------+-------+

    Useful for finding potential problems in data files.

    r   )lengthr)   )r   r
   r3   rX   r2   r   )r   r"   rowrY   r   r   r   
rowlengths  s   r^   r&   )
__future__r   r   r   collectionsr   petl.compatr   r   petl.util.baser   r	   r
   r   r   r   r#   r%   r$   r;   r   rD   rF   rE   rJ   rM   rK   rT   r[   r^   r   r   r   r   <module>   s:    
+,4.
#