o
    ;Di<                     @   s  d dl mZmZmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ 		d dd	Zee_G d
d deZdd Z		d ddZee_G dd deZdd Z		d!ddZee_G dd deZdd Z		d"ddZee_G dd deZdd Zee_dS )#    )absolute_importprint_functiondivisionN)	text_type)Table	asindices
itervalues)sortFTc                 C      t | |||||dS )a  
    Select rows with duplicate values under a given key (or duplicate
    rows where no key is given). E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.0],
        ...           ['B', 2, 3.4],
        ...           ['D', 6, 9.3],
        ...           ['B', 3, 7.8],
        ...           ['B', 2, 12.3],
        ...           ['E', None, 1.3],
        ...           ['D', 4, 14.5]]
        >>> table2 = etl.duplicates(table1, 'foo')
        >>> table2
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'B' |   2 |  3.4 |
        +-----+-----+------+
        | 'B' |   3 |  7.8 |
        +-----+-----+------+
        | 'B' |   2 | 12.3 |
        +-----+-----+------+
        | 'D' |   6 |  9.3 |
        +-----+-----+------+
        | 'D' |   4 | 14.5 |
        +-----+-----+------+

        >>> # compound keys are supported
        ... table3 = etl.duplicates(table1, key=['foo', 'bar'])
        >>> table3
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'B' |   2 |  3.4 |
        +-----+-----+------+
        | 'B' |   2 | 12.3 |
        +-----+-----+------+
        
    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are 
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.
    
    See also :func:`petl.transform.dedup.unique` and
    :func:`petl.transform.dedup.distinct`.
    
    key	presorted
buffersizetempdircache)DuplicatesViewtabler   r   r   r   r    r   M/var/www/Datamplify/venv/lib/python3.10/site-packages/petl/transform/dedup.py
duplicates   s   5r   c                   @   "   e Zd Z		dddZdd ZdS )	r   NFTc                 C   *   |r|| _ n
t|||||d| _ || _d S N)r   r   r   sourcer	   r   selfr   r   r   r   r   r   r   r   r   __init__J      
zDuplicatesView.__init__c                 C      t | j| jS N)iterduplicatesr   r   r   r   r   r   __iter__S      zDuplicatesView.__iter__NFNNT__name__
__module____qualname__r   r$   r   r   r   r   r   H   
    
	r   c                 c   s    t | }zt|}W n ty   |d u rY d S g }Y nw t|V  |d u r.tt|}nt||}tj| }d }d}|D ](}|d u rG|}q>||}	||}
|	|
krb|s\t|V  d}t|V  nd}|}q>d S NFT	iternextStopIterationtuplerangelenr   operator
itemgetter)r   r   ithdrindicesgetkeypreviousprevious_yieldedrowkprevkcurrr   r   r   r"   W   s:   



r"   c                 C   r
   )a  
    Select rows with unique values under a given key (or unique rows
    if no key is given). E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2],
        ...           ['B', '2', '3.4'],
        ...           ['D', 'xyz', 9.0],
        ...           ['B', u'3', u'7.8'],
        ...           ['B', '2', 42],
        ...           ['E', None, None],
        ...           ['D', 4, 12.3],
        ...           ['F', 7, 2.3]]
        >>> table2 = etl.unique(table1, 'foo')
        >>> table2
        +-----+------+------+
        | foo | bar  | baz  |
        +=====+======+======+
        | 'A' |    1 |    2 |
        +-----+------+------+
        | 'E' | None | None |
        +-----+------+------+
        | 'F' |    7 |  2.3 |
        +-----+------+------+
        
    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.

    See also :func:`petl.transform.dedup.duplicates` and
    :func:`petl.transform.dedup.distinct`.
    
    r   )
UniqueViewr   r   r   r   unique   s   'r@   c                   @   r   )	r?   NFTc                 C   r   r   r   r   r   r   r   r      r   zUniqueView.__init__c                 C   r    r!   )
iteruniquer   r   r#   r   r   r   r$      r%   zUniqueView.__iter__r&   r'   r   r   r   r   r?      r+   r?   c                 c   s    t | }zt|}W n
 ty   Y d S w t|V  |d u r&tt|}nt||}tj| }zt|}W n
 ty@   Y d S w ||}d}|D ]}	||	}
|
|k}|r\|r\t|V  |	}|
}|}qI|rj|V  d S d S )NTr-   )r   r   r6   r7   r8   r9   prevprev_keyprev_comp_necurrcurr_keycurr_comp_ner   r   r   rA      s<   




rA   c	           	      C   s   t | ||||||||d	S )a  
    Select rows with the same key value but differing in some other field.
    E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.7],
        ...           ['B', 2, None],
        ...           ['D', 3, 9.4],
        ...           ['B', None, 7.8],
        ...           ['E', None],
        ...           ['D', 3, 12.3],
        ...           ['A', 2, None]]
        >>> table2 = etl.conflicts(table1, 'foo')
        >>> table2
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'A' |   1 |  2.7 |
        +-----+-----+------+
        | 'A' |   2 | None |
        +-----+-----+------+
        | 'D' |   3 |  9.4 |
        +-----+-----+------+
        | 'D' |   3 | 12.3 |
        +-----+-----+------+
        
    Missing values are not considered conflicts. By default, `None` is treated
    as the missing value, this can be changed via the `missing` keyword 
    argument.

    One or more fields can be ignored when determining conflicts by providing
    the `exclude` keyword argument. Alternatively, fields to use when
    determining conflicts can be specified explicitly with the `include`
    keyword argument. This provides a simple mechanism for analysing the
    source of conflicting rows from multiple tables, e.g.::

        >>> table1 = [['foo', 'bar'], [1, 'a'], [2, 'b']]
        >>> table2 = [['foo', 'bar'], [1, 'a'], [2, 'c']]
        >>> table3 = etl.cat(etl.addfield(table1, 'source', 1),
        ...                  etl.addfield(table2, 'source', 2))
        >>> table4 = etl.conflicts(table3, key='foo', exclude='source')
        >>> table4
        +-----+-----+--------+
        | foo | bar | source |
        +=====+=====+========+
        |   2 | 'b' |      1 |
        +-----+-----+--------+
        |   2 | 'c' |      2 |
        +-----+-----+--------+

    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.
    
    )missingexcludeincluder   r   r   r   )ConflictsView)	r   r   rH   rJ   rI   r   r   r   r   r   r   r   	conflicts   s   
=rL   c                   @   r   )	rK   NFTc
           
      C   s<   |r|| _ n
t|||||	d| _ || _|| _|| _|| _d S r   )r   r	   r   rH   rI   rJ   )
r   r   r   rH   rI   rJ   r   r   r   r   r   r   r   r   3  s   
zConflictsView.__init__c                 C   s   t | j| j| j| j| jS r!   )iterconflictsr   r   rH   rI   rJ   r#   r   r   r   r$   ?  s   zConflictsView.__iter__NNNFNNTr'   r   r   r   r   rK   1  s
    
rK   c                 c   sR   |rt |ttfs|f}|rt |ttfs|f}|r|rd }t| }zt|}W n
 ty3   Y d S w ttt|}t|V  t||}t	j
| }	d }
d}|D ]V}|
d u rY|}
qP|	|
}|	|}||krd}t|
||D ]#\}}}|rx||vs|r~||v s|s|s|||fvr||krd} nqm|r|st|
V  d}t|V  nd}|}
qPd S r,   )
isinstancelistr1   r.   r/   r0   mapr   r   r4   r5   zip)r   r   rH   rI   rJ   r6   r7   fldsr8   r9   r:   r;   r<   r=   r>   conflictxyfr   r   r   rM   D  s`   




rM   c              	   C   s   t | ||||||dS )a  
    Return only distinct rows in the table.

    If the `count` argument is not None, it will be used as the name for an
    additional field, and the values of the field will be the number of
    duplicate rows.

    If the `key` keyword argument is passed, the comparison is done on the
    given key instead of the full row.

    See also :func:`petl.transform.dedup.duplicates`,
    :func:`petl.transform.dedup.unique`,
    :func:`petl.transform.reductions.groupselectfirst`,
    :func:`petl.transform.reductions.groupselectlast`.

    )r   countr   r   r   r   )DistinctView)r   r   rX   r   r   r   r   r   r   r   distinct~  s   
rZ   c                   @   s"   e Zd Z		dddZdd ZdS )	rY   NFTc                 C   s0   |r|| _ n
t|||||d| _ || _|| _d S )N)r   r   r   r   )r   r	   r   rX   )r   r   r   rX   r   r   r   r   r   r   r   r     s   
zDistinctView.__init__c                 c   s&   t | j}zt|}W n
 ty   Y d S w | jd u r#tt|}nt|| j}tj	| }t
 }| jrvt|| jf }|V  |}d}|D ]&}||u rN|}qE||}	||}
|	|
kr_|d7 }qEt||f V  d}|}qEt||f V  d S t|V  |}|D ]}||}||krt|V  |}qd S )N   )r.   r   r/   r0   r   r2   r3   r   r4   r5   objectrX   r1   )r   r6   r7   r8   r9   INITr:   n_dupr<   r=   r>   previous_keyskeysr   r   r   r$     sH   





zDistinctView.__iter__NNFNNTr'   r   r   r   r   rY     s
    

rY   c                 C   s2   t  }t| |D ]}||v r dS || qdS )a   
    Return True if there are no duplicate values for the given field(s),
    otherwise False. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b'],
        ...           ['b', 2],
        ...           ['c', 3, True]]
        >>> etl.isunique(table1, 'foo')
        False
        >>> etl.isunique(table1, 'bar')
        True

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    FT)setr   add)r   fieldvalsvr   r   r   isunique  s   rg   r&   rN   ra   )
__future__r   r   r   r4   petl.compatr   petl.util.baser   r   r   petl.transform.sortsr	   r   r   r"   r@   r?   rA   rL   rK   rM   rZ   rY   rg   r   r   r   r   <module>   s<    
9,
+,
B:
;
