o
    ;DiH                     @   sZ  d dl mZmZmZ d dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ d dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZmZ e	eZejZejZejZ			d%d
dZee_dd ZG dd dedddgZ d&ddZ!d'ddZ"d'ddZ#G dd deZ$G dd de%Z&dd Z'e'e_'G dd  d eZ(d!d" Z)d(d#d$Z*e*e_*dS ))    )absolute_importprint_functiondivisionN)NamedTemporaryFile)
namedtuple)picklenext	text_type)comparable_itemgetter)Table	asindicesFTc                 C   s   t | |||||dS )a	  
    Sort the table. Field names or indices (from zero) can be used to specify
    the key. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['C', 2],
        ...           ['A', 9],
        ...           ['A', 6],
        ...           ['F', 1],
        ...           ['D', 10]]
        >>> table2 = etl.sort(table1, 'foo')
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   9 |
        +-----+-----+
        | 'A' |   6 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+
        | 'D' |  10 |
        +-----+-----+
        | 'F' |   1 |
        +-----+-----+

        >>> # sorting by compound key is supported
        ... table3 = etl.sort(table1, key=['foo', 'bar'])
        >>> table3
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   6 |
        +-----+-----+
        | 'A' |   9 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+
        | 'D' |  10 |
        +-----+-----+
        | 'F' |   1 |
        +-----+-----+

        >>> # if no key is specified, the default is a lexical sort
        ... table4 = etl.sort(table1)
        >>> table4
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   6 |
        +-----+-----+
        | 'A' |   9 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+
        | 'D' |  10 |
        +-----+-----+
        | 'F' |   1 |
        +-----+-----+

    The `buffersize` argument should be an `int` or `None`.

    If the number of rows in the table is less than `buffersize`, the table
    will be sorted in memory. Otherwise, the table is sorted in chunks of
    no more than `buffersize` rows, each chunk is written to a temporary file,
    and then a merge sort is performed on the temporary files.

    If `buffersize` is `None`, the value of
    `petl.config.sort_buffersize` will be used. By default this is
    set to 100000 rows, but can be changed, e.g.::

        >>> import petl.config
        >>> petl.config.sort_buffersize = 500000

    If `petl.config.sort_buffersize` is set to `None`, this forces
    all sorting to be done entirely in memory.

    By default the results of the sort will be cached, and so a second pass over
    the sorted table will yield rows from the cache and will not repeat the
    sort operation. To turn off caching, set the `cache` argument to `False`.

    keyreverse
buffersizetempdircache)SortView)tabler   r   r   r   r    r   M/var/www/Datamplify/venv/lib/python3.10/site-packages/petl/transform/sorts.pysort   s   
Vr   c              	   c   sl    t d|   t| d}z	 t|V  q ty   Y nw W d    n1 s)w   Y  t d|   d S )Nziterchunk, opening %srbTzend of iterchunk, closed %s)debugopenr   loadEOFError)fnfr   r   r   
_iterchunkv   s   r   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )_Keyedc                 C   s   | j |j kS Nr   selfotherr   r   r   __eq__      z_Keyed.__eq__c                 C   s   | j |j k S r!   r"   r#   r   r   r   __lt__   r'   z_Keyed.__lt__c                 C   s   | j |j kS r!   r"   r#   r   r   r   __le__   r'   z_Keyed.__le__c                 C   s   | j |j kS r!   r"   r#   r   r   r   __ne__   r'   z_Keyed.__ne__c                 C   s   | j |j kS r!   r"   r#   r   r   r   __gt__   r'   z_Keyed.__gt__c                 C   s   | j |j kS r!   r"   r#   r   r   r   __ge__   r'   z_Keyed.__ge__N)	__name__
__module____qualname__r&   r(   r)   r*   r+   r,   r   r   r   r   r       s    r    Keyedr   objc                 '   sV     du r|}t j| D ]}|V  qdS  fdd|D }t j| D ]}|jV  q"dS )a  Return a single iterator over the given iterables, sorted by the
    given `key` function, assuming the input iterables are already sorted by
    the same function. (I.e., the merge part of a general merge sort.) Uses
    :func:`heapq.merge` for the underlying implementation.Nc                    s   g | ]} fd d|D qS )c                 3   s    | ]
}t  ||V  qd S r!   )r    ).0r1   r"   r   r   	<genexpr>   s    z/_heapqmergesorted.<locals>.<listcomp>.<genexpr>r   )r2   iterabler"   r   r   
<listcomp>   s    z%_heapqmergesorted.<locals>.<listcomp>)heapqmerger1   )r   	iterableskeyed_iterableselementr   r"   r   _heapqmergesorted   s   

r;   c              	   g   s    |rt }nt}| durd| i}nt }t }t }|D ]}t|}zt|}	|| ||	 W q ty;   Y qw |rl||fi |}
|
V  ||
}z
t|| ||< W n tyg   ||= ||= Y nw |s>dS dS )a5  Return a single iterator over the given iterables, sorted by the
    given `key` function, assuming the input iterables are already sorted by
    the same function. (I.e., the merge part of a general merge sort.) Uses
    :func:`min` (or :func:`max` if ``reverse=True``) for the underlying
    implementation.Nr   )	maxmindictlistiterr   appendStopIterationindex)r   r   r8   opopkwargs	iterators	shortlistr4   itfirstnxtnextidxr   r   r   _shortlistmergesorted   s:   



rL   c                 G   s&   |rt | dg|R  S t| g|R  S )NT)rL   r;   )r   r   r8   r   r   r   _mergesorted   s   rM   c                   @   sB   e Zd Z		dddZdd Zdd	 Zd
d Zdd Zdd ZdS )r   NFTc                 C   sR   || _ || _|| _|d u rtj| _n|| _|| _|| _d | _d | _	d | _
d | _d S r!   )sourcer   r   configsort_buffersizer   r   r   	_hdrcache	_memcache
_filecache_getkey)r$   rN   r   r   r   r   r   r   r   r   __init__   s   

zSortView.__init__c                 C   s$   t d d | _d | _d | _d | _d S )Nzclear cache)r   rQ   rR   rS   rT   r$   r   r   r   
clearcache   s
   
zSortView.clearcachec                 C   sP   | j }| j}| j}| jr| jd ur|  S | jr!| jd ur!|  S | |||S r!   )	rN   r   r   r   rR   _iterfrommemcacherS   _iterfromfilecache_iternocache)r$   rN   r   r   r   r   r   __iter__   s   zSortView.__iter__c                 c   s0    t d t| jV  | jD ]}t|V  qd S )Nziterate from memory cache)r   tuplerQ   rR   )r$   rowr   r   r   rX      s   
zSortView._iterfrommemcachec                 c   s    | j }tttd|}td| t| jV  dd |D }t| j	| j
g|R  }z|D ]}t|V  q.W td ~~~td d S td ~~~td w )Nnameziterate from file cache: %rc                 S      g | ]}t |qS r   )r   )r2   r   r   r   r   r5         z/SortView._iterfromfilecache.<locals>.<listcomp>zattempt cleanup from generatorzexiting generator)rS   r?   mapoperator
attrgetterr   r\   rQ   rM   rT   r   )r$   	filecache	filenames
chunkitersrowsr]   r   r   r   rY     s*   

zSortView._iterfromfilecachec                 c   s   t d |   t|}zt|}W n ty%   |d u r!Y d S g }Y nw t|V  |d ur5t||}ntt|}t	| }t
t|d| j}|j||d | jd u s\t|| jk rx| jrlt d || _|| _|| _|D ]}	t|	V  qnd S g }
|rt| jddd*}t|j}t d|j  |D ]
}	tj|	|d	d
 q|  |
| W d    n1 sw   Y  t
t|d| j}|j||d |s|| jrt d || _|
| _|| _dd |
D }t||g|R  D ]}	t|	V  qd S )Nziterate without cacher   )r   r   zcaching memFwb)dirdeletemodezcreated temporary chunk file %s)protocolzcaching filesc                 S   s   g | ]}t |jqS r   )r   r^   r2   r   r   r   r   r5   e  s    z)SortView._iternocache.<locals>.<listcomp>)r   rW   r@   r   rB   r\   r   rangelenr
   r?   	itertoolsislicer   r   r   rQ   rR   rT   r   r   _NamedTempFileDeleteOnGCr^   r   dumpflushrA   rS   rM   )r$   rN   r   r   rH   hdrindicesgetkeyrg   r]   
chunkfilesr   wrapperrf   r   r   r   rZ     sh   

zSortView._iternocacheNFNNT)	r-   r.   r/   rU   rW   r[   rX   rY   rZ   r   r   r   r   r      s    
r   c                   @   s>   e Zd Zdd ZejejfddZdd Z	dd Z
d	d
 ZdS )rs   c                 C   s
   || _ d S r!   r^   )r$   r^   r   r   r   rU   l  s   
z!_NamedTempFileDeleteOnGC.__init__c              
   C   sZ   | j }z|d|  || W n ty$ } z	|d||f   d }~ww |d|  d S )Nzdeleting %szexception deleting %s: %sz
deleted %s)r^   	Exception)r$   unlinklogr^   er   r   r   rj   o  s   z_NamedTempFileDeleteOnGC.deletec                 C   s   |    d S r!   )rj   rV   r   r   r   __del__z  r'   z _NamedTempFileDeleteOnGC.__del__c                 C      | j S r!   r|   rV   r   r   r   __str__}     z _NamedTempFileDeleteOnGC.__str__c                 C   r   r!   r|   rV   r   r   r   __repr__  r   z!_NamedTempFileDeleteOnGC.__repr__N)r-   r.   r/   rU   osr~   loggerr   rj   r   r   r   r   r   r   r   rs   j  s    rs   c                  O   s   t | fi |S )a   
    Combine multiple input tables into one sorted output table. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['A', 9],
        ...           ['C', 2],
        ...           ['D', 10],
        ...           ['A', 6],
        ...           ['F', 1]]
        >>> table2 = [['foo', 'bar'],
        ...           ['B', 3],
        ...           ['D', 10],
        ...           ['A', 10],
        ...           ['F', 4]]
        >>> table3 = etl.mergesort(table1, table2, key='foo')
        >>> table3.lookall()
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   9 |
        +-----+-----+
        | 'A' |   6 |
        +-----+-----+
        | 'A' |  10 |
        +-----+-----+
        | 'B' |   3 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+
        | 'D' |  10 |
        +-----+-----+
        | 'D' |  10 |
        +-----+-----+
        | 'F' |   1 |
        +-----+-----+
        | 'F' |   4 |
        +-----+-----+

    If the input tables are already sorted by the given key, give
    ``presorted=True`` as a keyword argument.

    This function is equivalent to concatenating the input tables using
    :func:`cat` then sorting, however this function will typically be more
    efficient, especially if the input tables are presorted.

    Keyword arguments:

    key : string or tuple of strings, optional
        Field name or tuple of fields to sort by (defaults to `None` lexical
        sort)
    reverse : bool, optional
        `True` if sort in reverse (descending) order (defaults to `False`)
    presorted : bool, optional
        `True` if inputs are already sorted by the given key (defaults to
        `False`)
    missing : object
        Value to fill with when input tables have different fields (defaults to
        `None`)
    header : sequence of strings, optional
        Specify a fixed header for the output table
    buffersize : int, optional
        Limit the number of rows in memory per input table when inputs are not
        presorted

    )MergeSortView)tableskwargsr   r   r   	mergesort  s   Dr   c                   @   s$   e Zd Z			dddZdd ZdS )	r   NFTc
           
         sD   | _ |r	|| _n fdd|D | _|| _|| _| _d S )Nc              
      s    g | ]}t | d qS )r   )r   r2   tr   r   r   r   r   r   r   r5     s    
z*MergeSortView.__init__.<locals>.<listcomp>)r   r   missingheaderr   )
r$   r   r   r   	presortedr   r   r   r   r   r   r   r   rU     s   
zMergeSortView.__init__c                 C   s   t | j| j| j| j| jS r!   )itermergesortr   r   r   r   r   rV   r   r   r   r[     s   zMergeSortView.__iter__)NFFNNNNT)r-   r.   r/   rU   r[   r   r   r   r   r     s    
r   c              	   #   s    dd | D }g }|D ]}z	| t| W q ty%   | g  Y qw |d u rFt |D ]}ttt|D ]}	|	vrC |	 q8q/n|tV  fdd  fddt||D }
d }|d urot|}t	| }t
||g|
R  D ]}|V  qxd S )Nc                 S   r_   r   )r@   r   r   r   r   r5     r`   z!itermergesort.<locals>.<listcomp>c                 3   s    t tt|| D ]D zt fdd|D V  W q
 tyN   gt| }tD ]\}}z | |||< W q. tyF   Y q.w t|V  Y q
w d S )Nc                 3   s*    | ]}|v r  | nV  qd S r!   )rC   )r2   fo)_rowfldsr   r   r   r3     s    "z:itermergesort.<locals>._standardisedata.<locals>.<genexpr>)r?   ra   r	   r\   
IndexErrorrp   	enumeraterC   )rH   rv   ofsoutrowifi)r   )r   r   r   _standardisedata  s$   z'itermergesort.<locals>._standardisedatac                    s   g | ]
\}} ||qS r   r   )r2   rv   rH   )r   outhdrr   r   r5     s    )rA   r   rB   r?   ra   r	   r\   zipr   r
   rL   )sourcesr   r   r   r   itssrc_hdrsrH   rv   r   sitsrx   rw   r]   r   )r   r   r   r   r     s>   


r   c                 C   s   |r|rt j}n|r|st j}n	|rt j}nt j}t| }zdd t|D }W n ty3   g }Y nw |du rMt|}|D ]}|||sH dS |}q>dS tt	|| }	t|}|	|}
|D ]}|	|}|||
sl dS |}
q^dS )a  
    Return True if the table is ordered (i.e., sorted) by the given key. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['a', 1, True],
        ...           ['b', 3, True],
        ...           ['b', 2]]
        >>> etl.issorted(table1, key='foo')
        True
        >>> etl.issorted(table1, key='bar')
        False
        >>> etl.issorted(table1, key='foo', strict=True)
        False
        >>> etl.issorted(table1, key='foo', reverse=True)
        False

    c                 S   r_   r   )r	   rn   r   r   r   r5   @  r`   zissorted.<locals>.<listcomp>NFT)
rb   ltlegtger@   r   rB   r
   r   )r   r   r   strictrD   rH   r   prevcurrrx   prevkeycurrkeyr   r   r   issorted   s:   


r   r{   r!   )NF)NFF)+
__future__r   r   r   r   r6   tempfiler   rq   loggingcollectionsr   rb   petl.compatr   r   r	   petl.configrO   petl.comparisonr
   petl.util.baser   r   	getLoggerr-   r   warninginfor   r   r   r    r;   rL   rM   r   objectrs   r   r   r   r   r   r   r   r   <module>   sD    

Z


' G
=
5