o
    ;D©i®<  ã                   @   s  d dl mZmZmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ 		d dd	„Zee_G d
d„ deƒZdd„ Z		d dd„Zee_G dd„ deƒZdd„ Z		d!dd„Zee_G dd„ deƒZdd„ Z		d"dd„Zee_G dd„ deƒZdd„ Zee_dS )#é    )Úabsolute_importÚprint_functionÚdivisionN)Ú	text_type)ÚTableÚ	asindicesÚ
itervalues)ÚsortFTc                 C   ó   t | |||||dS )a—  
    Select rows with duplicate values under a given key (or duplicate
    rows where no key is given). E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.0],
        ...           ['B', 2, 3.4],
        ...           ['D', 6, 9.3],
        ...           ['B', 3, 7.8],
        ...           ['B', 2, 12.3],
        ...           ['E', None, 1.3],
        ...           ['D', 4, 14.5]]
        >>> table2 = etl.duplicates(table1, 'foo')
        >>> table2
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'B' |   2 |  3.4 |
        +-----+-----+------+
        | 'B' |   3 |  7.8 |
        +-----+-----+------+
        | 'B' |   2 | 12.3 |
        +-----+-----+------+
        | 'D' |   6 |  9.3 |
        +-----+-----+------+
        | 'D' |   4 | 14.5 |
        +-----+-----+------+

        >>> # compound keys are supported
        ... table3 = etl.duplicates(table1, key=['foo', 'bar'])
        >>> table3
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'B' |   2 |  3.4 |
        +-----+-----+------+
        | 'B' |   2 | 12.3 |
        +-----+-----+------+
        
    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are 
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.
    
    See also :func:`petl.transform.dedup.unique` and
    :func:`petl.transform.dedup.distinct`.
    
    ©ÚkeyÚ	presortedÚ
buffersizeÚtempdirÚcache)ÚDuplicatesView©Útabler   r   r   r   r   © r   úM/var/www/Datamplify/venv/lib/python3.10/site-packages/petl/transform/dedup.pyÚ
duplicates   s   5ÿr   c                   @   ó"   e Zd Z		ddd„Zdd„ ZdS )	r   NFTc                 C   ó*   |r|| _ n
t|||||d| _ || _d S ©N)r   r   r   ©Úsourcer	   r   ©Úselfr   r   r   r   r   r   r   r   r   Ú__init__J   ó   ÿ
zDuplicatesView.__init__c                 C   ó   t | j| jƒS ©N)Úiterduplicatesr   r   ©r   r   r   r   Ú__iter__S   ó   zDuplicatesView.__iter__©NFNNT©Ú__name__Ú
__module__Ú__qualname__r   r$   r   r   r   r   r   H   ó
    
ÿ	r   c                 c   sÒ    t | ƒ}zt|ƒ}W n ty   |d u rY d S g }Y nw t|ƒV  |d u r.tt|ƒƒ}nt||ƒ}tj|Ž }d }d}|D ](}|d u rG|}q>||ƒ}	||ƒ}
|	|
krb|s\t|ƒV  d}t|ƒV  nd}|}q>d S ©NFT©	ÚiterÚnextÚStopIterationÚtupleÚrangeÚlenr   ÚoperatorÚ
itemgetter)r   r   ÚitÚhdrÚindicesÚgetkeyÚpreviousÚprevious_yieldedÚrowÚkprevÚkcurrr   r   r   r"   W   s:   €ý



òr"   c                 C   r
   )a  
    Select rows with unique values under a given key (or unique rows
    if no key is given). E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2],
        ...           ['B', '2', '3.4'],
        ...           ['D', 'xyz', 9.0],
        ...           ['B', u'3', u'7.8'],
        ...           ['B', '2', 42],
        ...           ['E', None, None],
        ...           ['D', 4, 12.3],
        ...           ['F', 7, 2.3]]
        >>> table2 = etl.unique(table1, 'foo')
        >>> table2
        +-----+------+------+
        | foo | bar  | baz  |
        +=====+======+======+
        | 'A' |    1 |    2 |
        +-----+------+------+
        | 'E' | None | None |
        +-----+------+------+
        | 'F' |    7 |  2.3 |
        +-----+------+------+
        
    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.

    See also :func:`petl.transform.dedup.duplicates` and
    :func:`petl.transform.dedup.distinct`.
    
    r   )Ú
UniqueViewr   r   r   r   Úuniqueƒ   s   'ÿr@   c                   @   r   )	r?   NFTc                 C   r   r   r   r   r   r   r   r   ³   r   zUniqueView.__init__c                 C   r    r!   )Ú
iteruniquer   r   r#   r   r   r   r$   ¼   r%   zUniqueView.__iter__r&   r'   r   r   r   r   r?   ±   r+   r?   c                 c   sØ    t | ƒ}zt|ƒ}W n
 ty   Y d S w t|ƒV  |d u r&tt|ƒƒ}nt||ƒ}tj|Ž }zt|ƒ}W n
 ty@   Y d S w ||ƒ}d}|D ]}	||	ƒ}
|
|k}|r\|r\t|ƒV  |	}|
}|}qI|rj|V  d S d S )NTr-   )r   r   r6   r7   r8   r9   ÚprevÚprev_keyÚprev_comp_neÚcurrÚcurr_keyÚcurr_comp_ner   r   r   rA   À   s<   €ÿ


ÿ

ÿrA   c	           	      C   s   t | ||||||||d	S )aÇ  
    Select rows with the same key value but differing in some other field.
    E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.7],
        ...           ['B', 2, None],
        ...           ['D', 3, 9.4],
        ...           ['B', None, 7.8],
        ...           ['E', None],
        ...           ['D', 3, 12.3],
        ...           ['A', 2, None]]
        >>> table2 = etl.conflicts(table1, 'foo')
        >>> table2
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'A' |   1 |  2.7 |
        +-----+-----+------+
        | 'A' |   2 | None |
        +-----+-----+------+
        | 'D' |   3 |  9.4 |
        +-----+-----+------+
        | 'D' |   3 | 12.3 |
        +-----+-----+------+
        
    Missing values are not considered conflicts. By default, `None` is treated
    as the missing value, this can be changed via the `missing` keyword 
    argument.

    One or more fields can be ignored when determining conflicts by providing
    the `exclude` keyword argument. Alternatively, fields to use when
    determining conflicts can be specified explicitly with the `include`
    keyword argument. This provides a simple mechanism for analysing the
    source of conflicting rows from multiple tables, e.g.::

        >>> table1 = [['foo', 'bar'], [1, 'a'], [2, 'b']]
        >>> table2 = [['foo', 'bar'], [1, 'a'], [2, 'c']]
        >>> table3 = etl.cat(etl.addfield(table1, 'source', 1),
        ...                  etl.addfield(table2, 'source', 2))
        >>> table4 = etl.conflicts(table3, key='foo', exclude='source')
        >>> table4
        +-----+-----+--------+
        | foo | bar | source |
        +=====+=====+========+
        |   2 | 'b' |      1 |
        +-----+-----+--------+
        |   2 | 'c' |      2 |
        +-----+-----+--------+

    If `presorted` is True, it is assumed that the data are already sorted by
    the given key, and the `buffersize`, `tempdir` and `cache` arguments are
    ignored. Otherwise, the data are sorted, see also the discussion of the
    `buffersize`, `tempdir` and `cache` arguments under the
    :func:`petl.transform.sorts.sort` function.
    
    )ÚmissingÚexcludeÚincluder   r   r   r   )ÚConflictsView)	r   r   rH   rJ   rI   r   r   r   r   r   r   r   Ú	conflictsì   s   
=þrL   c                   @   r   )	rK   NFTc
           
      C   s<   |r|| _ n
t|||||	d| _ || _|| _|| _|| _d S r   )r   r	   r   rH   rI   rJ   )
r   r   r   rH   rI   rJ   r   r   r   r   r   r   r   r   3  s   ÿ
zConflictsView.__init__c                 C   s   t | j| j| j| j| jƒS r!   )Úiterconflictsr   r   rH   rI   rJ   r#   r   r   r   r$   ?  s   ÿzConflictsView.__iter__©NNNFNNTr'   r   r   r   r   rK   1  s
    
ÿrK   c                 c   sR   |rt |ttfƒs|f}|rt |ttfƒs|f}|r|rd }t| ƒ}zt|ƒ}W n
 ty3   Y d S w ttt|ƒƒ}t|ƒV  t||ƒ}t	j
|Ž }	d }
d}|D ]V}|
d u rY|}
qP|	|
ƒ}|	|ƒ}||kr¢d}t|
||ƒD ]#\}}}|rx||vs‚|r~||v s‚|s|s|||fvr||krd} nqm|r¡|sœt|
ƒV  d}t|ƒV  nd}|}
qPd S r,   )Ú
isinstanceÚlistr1   r.   r/   r0   Úmapr   r   r4   r5   Úzip)r   r   rH   rI   rJ   r6   r7   Úfldsr8   r9   r:   r;   r<   r=   r>   ÚconflictÚxÚyÚfr   r   r   rM   D  s`   €ÿ


ÿÿÿ€

€èrM   c              	   C   s   t | ||||||dS )a  
    Return only distinct rows in the table.

    If the `count` argument is not None, it will be used as the name for an
    additional field, and the values of the field will be the number of
    duplicate rows.

    If the `key` keyword argument is passed, the comparison is done on the
    given key instead of the full row.

    See also :func:`petl.transform.dedup.duplicates`,
    :func:`petl.transform.dedup.unique`,
    :func:`petl.transform.reductions.groupselectfirst`,
    :func:`petl.transform.reductions.groupselectlast`.

    )r   Úcountr   r   r   r   )ÚDistinctView)r   r   rX   r   r   r   r   r   r   r   Údistinct~  s   
ÿrZ   c                   @   s"   e Zd Z		ddd„Zdd„ ZdS )	rY   NFTc                 C   s0   |r|| _ n
t|||||d| _ || _|| _d S )N)r   r   r   r   )r   r	   r   rX   )r   r   r   rX   r   r   r   r   r   r   r   r   ™  s   ÿ
zDistinctView.__init__c                 c   s&   t | jƒ}zt|ƒ}W n
 ty   Y d S w | jd u r#tt|ƒƒ}nt|| jƒ}tj	|Ž }t
ƒ }| jrvt|ƒ| jf }|V  |}d}|D ]&}||u rN|}qE||ƒ}	||ƒ}
|	|
kr_|d7 }qEt|ƒ|f V  d}|}qEt|ƒ|f V  d S t|ƒV  |}|D ]}||ƒ}||krŽt|ƒV  |}qd S )Né   )r.   r   r/   r0   r   r2   r3   r   r4   r5   ÚobjectrX   r1   )r   r6   r7   r8   r9   ÚINITr:   Ún_dupr<   r=   r>   Úprevious_keysÚkeysr   r   r   r$   £  sH   €
ÿ




üzDistinctView.__iter__©NNFNNTr'   r   r   r   r   rY   ˜  s
    
ÿ
rY   c                 C   s2   t ƒ }t| |ƒD ]}||v r dS | |¡ qdS )a   
    Return True if there are no duplicate values for the given field(s),
    otherwise False. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b'],
        ...           ['b', 2],
        ...           ['c', 3, True]]
        >>> etl.isunique(table1, 'foo')
        False
        >>> etl.isunique(table1, 'bar')
        True

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    FT)Úsetr   Úadd)r   ÚfieldÚvalsÚvr   r   r   ÚisuniqueÓ  s   rg   r&   rN   ra   )Ú
__future__r   r   r   r4   Úpetl.compatr   Úpetl.util.baser   r   r   Úpetl.transform.sortsr	   r   r   r"   r@   r?   rA   rL   rK   rM   rZ   rY   rg   r   r   r   r   Ú<module>   s<    
ÿ9,
ÿ+,
ÿB:
ÿ;