o
    8DiC                     @  s  d Z ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ dd	lmZ ejr=dd
lmZ eddG dd dZeddG dd dZeddG dd dZeddG dd dZeddG dd dZejrddlmZ ejeeeeef Z		d?d@d$d%Z ej!ej"ej#ej$ej%ej&ej'ej(fZ)ej*fZ+G d&d' d'Z,dAd*d+Z-dBd-d.Z.dCd/d0Z/dDd3d4Z0dAd5d6Z1dEd=d>Z2dS )Fz)
.. include:: ../posts/sql_diff.md

----
    )annotationsN)defaultdict)	dataclass)heappopheappush)chain)Dialectexpressions)seq_get)DialectTypeT)frozenc                   @     e Zd ZU dZded< dS )Insertz+Indicates that a new node has been insertedexp.Expression
expressionN__name__
__module____qualname____doc____annotations__ r   r   E/var/www/Datamplify/venv/lib/python3.10/site-packages/sqlglot/diff.pyr         
 r   c                   @  r   )Removez0Indicates that an existing node has been removedr   r   Nr   r   r   r   r   r      r   r   c                   @  "   e Zd ZU dZded< ded< dS )MovezFIndicates that an existing node's position within the tree has changedr   sourcetargetNr   r   r   r   r   r   $      
 r   c                   @  r   )Updatez0Indicates that an existing node has been updatedr   r   r   Nr   r   r   r   r   r    ,   r   r    c                   @  r   )Keepz3Indicates that an existing node hasn't been changedr   r   r   Nr   r   r   r   r   r!   4   r   r!   )TFr   r   r   	matchings6t.List[t.Tuple[exp.Expression, exp.Expression]] | None
delta_onlyboolkwargst.Anyreturnt.List[Edit]c              	     sN  |pg }ddd}t |  }t | }dd	 |D }d
d	 |D }	t|t|kp6t|t|	kp6||	@ }
|
r=|  n| }|
rE| n|}zP|
ri|ri||t |  ||t |  fdd|D }ntt|t|D ]}t||_qrtdi |j	||||d}W |
st||D ]}d|_q|S |
st||D ]}d|_qw w )a  
    Returns the list of changes between the source and the target expressions.

    Examples:
        >>> diff(parse_one("a + b"), parse_one("a + c"))
        [
            Remove(expression=(COLUMN this: (IDENTIFIER this: b, quoted: False))),
            Insert(expression=(COLUMN this: (IDENTIFIER this: c, quoted: False))),
            Keep(
                source=(ADD this: ...),
                target=(ADD this: ...)
            ),
            Keep(
                source=(COLUMN this: (IDENTIFIER this: a, quoted: False)),
                target=(COLUMN this: (IDENTIFIER this: a, quoted: False))
            ),
        ]

    Args:
        source: the source expression.
        target: the target expression against which the diff should be calculated.
        matchings: the list of pre-matched node pairs which is used to help the algorithm's
            heuristics produce better results for subtrees that are known by a caller to be matching.
            Note: expression references in this list must refer to the same node objects that are
            referenced in the source / target trees.
        delta_only: excludes all `Keep` nodes from the diff.
        kwargs: additional arguments to pass to the ChangeDistiller instance.

    Returns:
        the list of Insert, Remove, Move, Update and Keep objects for each node in the source and the
        target expression trees. This list represents a sequence of steps needed to transform the source
        expression tree into the target one.
    	old_nodestuple[exp.Expression, ...]	new_nodesr)   t.Dict[int, exp.Expression]c                 S  s:   i }t t| t|D ]\}}t||_||t|< q|S N)zipreversedhash_hashid)r+   r-   node_mappingold_nodenew_noder   r   r   compute_node_mappingsl   s
   
z#diff.<locals>.compute_node_mappingsc                 S     h | ]}t |qS r   r4   .0nr   r   r   	<setcomp>z       zdiff.<locals>.<setcomp>c                 S  r9   r   r:   r;   r   r   r   r>   {   r?   c                   s(   g | ]\}} t | t | fqS r   r:   r<   stsource_mappingtarget_mappingr   r   
<listcomp>   s   ( zdiff.<locals>.<listcomp>)r#   r%   N)r+   r,   r-   r,   r)   r.   r   )
tuplewalklencopyr   r1   r2   r3   ChangeDistillerdiff)r   r   r#   r%   r'   r8   source_nodestarget_nodes
source_ids
target_idsrJ   source_copytarget_copynodeedit_scriptr   rC   r   rL   B   sF   (
rL   c                   @  sh   e Zd ZdZd)d*ddZ		d+d,ddZd-ddZd.ddZd/dd Zd/d!d"Z	d0d#d$Z
d1d'd(ZdS )2rK   a)  
    The implementation of the Change Distiller algorithm described by Beat Fluri and Martin Pinzger in
    their paper https://ieeexplore.ieee.org/document/4339230, which in turn is based on the algorithm by
    Chawathe et al. described in http://ilpubs.stanford.edu:8090/115/1/1995-46.pdf.
    333333?NffloatrB   dialectr   r)   Nonec                 C  s    || _ || _t| | _d S r/   )rV   rB   r   get_or_raise	generator_sql_generator)selfrV   rB   rX   r   r   r   __init__   s   zChangeDistiller.__init__Fr   r   r   r#   r$   r%   r&   r*   c                 C  s   |pg }dd |D }|| _ || _dd | j  D | _dd | j D | _t| jt| | _t| jt|  | _i | _	| 
 t| B }| t||S )Nc                 S  s   i | ]\}}t |t |qS r   r:   r@   r   r   r   
<dictcomp>   s    z(ChangeDistiller.diff.<locals>.<dictcomp>c                 S      i | ]}t |tst||qS r   
isinstanceIGNORED_LEAF_EXPRESSION_TYPESr4   r;   r   r   r   r_      
    
c                 S  r`   r   ra   r;   r   r   r   r_      rd   )_source_targetbfs_source_index_target_indexset_unmatched_source_nodesvalues_unmatched_target_nodes_bigram_histo_cache_compute_matching_setitems_generate_edit_scriptdict)r]   r   r   r#   r%   pre_matched_nodesmatching_setr   r   r   rL      s   zChangeDistiller.difft.Dict[int, int]c                 C  sB  g }| j D ]}|t| j|  q| jD ]}|t| j|  q| D ]x\}}| j| }| j| }	||	k}
t|t	r?|
r|
rh|j
}|	j
}|rK|r^|sO|s^|rg|rg|t|t|krg|t||	d n
|| ||	| tt|}tt|	}||kr|t||	 q&|s|t||	 q&|t||	 q&|S )Nr   r   )rk   appendr   rh   rm   r   ri   rp   rb   UPDATABLE_EXPRESSION_TYPESparentgetr4   r   extend_generate_move_editsrr   _get_non_expression_leavesr    r!   )r]   r#   r%   rT   removed_node_idinserted_node_idkept_source_node_idkept_target_node_idsource_nodetarget_nodeidentical_nodessource_parenttarget_parentsource_non_expression_leavestarget_non_expression_leavesr   r   r   rq      sP   



z%ChangeDistiller._generate_edit_scriptt.List[Move]c           	        s   dd t |D }dd t |D }tt|| fdd}g }|D ]}||vr>|| jvr>|t| j| | j |  d q"|S )Nc                 S     g | ]}t |qS r   r:   r<   er   r   r   rF     r?   z8ChangeDistiller._generate_move_edits.<locals>.<listcomp>c                 S  r   r   r:   r   r   r   r   rF     r?   c                   s     tt| |kS r/   )rz   rB   castint)lrr#   r   r   <lambda>  s    z6ChangeDistiller._generate_move_edits.<locals>.<lambda>rv   )_expression_only_argsrj   _lcsrk   rw   r   rh   ri   )	r]   r   r   r#   source_argstarget_argsargs_lcs
move_editsar   r   r   r|      s   z$ChangeDistiller._generate_move_editst.Set[t.Tuple[int, int]]c                   sN     }| } fdd j D } fdd j D }|D ]}|D ]}} j| } j| }t||rdd t|D dd t|D t	t
t
}	|	retfdd|D }
|
|	 }nd	}tt
t
d
kru jnd}|dks||kr || jkr|||f  j|  j| ||d   nq&q"|S )Nc                   $   i | ]}t | jv rt |d qS r/   )r4   rk   r;   r]   r   r   r_         z9ChangeDistiller._compute_matching_set.<locals>.<dictcomp>c                   r   r/   )r4   rm   r;   r   r   r   r_     r   c                 S  r9   r   r:   r<   r   r   r   r   r>   "  r?   z8ChangeDistiller._compute_matching_set.<locals>.<setcomp>c                 S  r9   r   r:   r   r   r   r   r>   #  r?   c                 3  s,    | ]\}}| v r|v rd ndV  qdS )   r   Nr   r@   )source_leaf_idstarget_leaf_idsr   r   	<genexpr>'  s
    
z8ChangeDistiller._compute_matching_set.<locals>.<genexpr>           g?g?)_compute_leaf_matching_setrJ   re   rg   rf   rh   ri   _is_same_type_get_expression_leavesmaxrI   summinrB   _dice_coefficientrV   addrk   removerm   pop)r]   leaves_matching_setrt   ordered_unmatched_source_nodesordered_unmatched_target_nodessource_node_idtarget_node_idr   r   max_leaves_numcommon_leaves_numleaf_similarity_score
adjusted_tr   )r]   r   r   r   ro     sD   





z%ChangeDistiller._compute_matching_setc           	   	   C  s   g }t t| j}t t| j}|D ](}|D ]#}t||r9| ||}|| jkr9t|| t|| t	|||f qqt
 }|rtt|\}}}}}t|| jv rrt|| jv rr|t|t|f | jt| | jt| |s@|S r/   )listr   re   rf   r   r   rV   r   _parent_similarity_scorerI   rj   r   r4   rk   rm   r   r   )	r]   candidate_matchingssource_expression_leavestarget_expression_leavessource_leaftarget_leafsimilarity_scorert   _r   r   r   r   ?  s:   



z*ChangeDistiller._compute_leaf_matching_setc           	      C  s   |  |}|  |}t| t|  }|s ||krdS dS d}t|t|@ }|D ]}|t|| || 7 }q,d| | S )Ng      ?r   r      )_bigram_histor   rl   rj   r   )	r]   r   r   source_histotarget_histototal_gramsoverlap_lenoverlapping_gramsgr   r   r   r   a  s   

z!ChangeDistiller._dice_coefficientr   t.DefaultDict[str, int]c                 C  s~   t || jv r| jt | S | j|}tdt|d }tt}t|D ]}||||d    d7  < q%|| jt |< |S )Nr   r   r   )	r4   rn   r\   generater   rI   r   r   range)r]   r   expression_strcountbigram_histoir   r   r   r   p  s   zChangeDistiller._bigram_histo)rU   rU   N)rV   rW   rB   rW   rX   r   r)   rY   NF)
r   r   r   r   r#   r$   r%   r&   r)   r*   )r#   ru   r%   r&   r)   r*   )r   r   r   r   r#   ru   r)   r   )r)   r   )r   r   r   r   r)   rW   )r   r   r)   r   )r   r   r   r   r^   rL   rq   r|   ro   r   r   r   r   r   r   r   rK      s    	

,

-
"rK   r   t.Iterator[exp.Expression]c                 c  sB    d}|   D ]}t|tsd}t|E d H  q|s| V  d S d S )NFT)iter_expressionsrb   rc   r   )r   has_child_exprsrS   r   r   r   r   ~  s   

r   t.Iterator[t.Tuple[str, t.Any]]c                 c  sN    | j  D ]\}}t|tjst|trtt|dtjrq||fV  qd S )Nr   )argsrp   rb   exp
Expressionr   r
   )r   argvaluer   r   r   r}     s   r}   c                 C  sT   t | t |u r(t| tjr| jd|jdkS t| tjr&| j|jkS dS dS )NsideTF)typerb   r   Joinr   rz   	Anonymousthisrv   r   r   r   r     s   r   t.Optional[exp.Expression]r   c                 C  s6   | d u s|d u st | t |urdS dt| j|j S )Nr   r   )r   r   ry   rv   r   r   r   r     s    r   c                 c  s    dd |   D E d H  d S )Nc                 s  s    | ]
}t |ts|V  qd S r/   )rb   rc   )r<   r   r   r   r   r     s    
z(_expression_only_args.<locals>.<genexpr>)r   )r   r   r   r   r     s   r   seq_at.Sequence[T]seq_bequalt.Callable[[T, T], bool]t.Sequence[t.Optional[T]]c                   s  t | }t |  fddt|d D }t|d D ]e}t d D ]\}|dks-|dkr4g || |< q#|| |d  ||d  rW||d  |d  | |d  g || |< q#t ||d  | t || |d  krs||d  | n|| |d  || |< q#q||   S )z)Calculates the longest common subsequencec                   s   g | ]	}d g d  qS )Nr   r   )r<   r   len_br   r   rF     s    z_lcs.<locals>.<listcomp>r   r   )rI   r   )r   r   r   len_a
lcs_resultr   jr   r   r   r     s   ,(r   r   )r   r   r   r   r#   r$   r%   r&   r'   r(   r)   r*   )r   r   r)   r   )r   r   r)   r   )r   r   r   r   r)   r&   )r   r   r   r   r)   r   )r   r   r   r   r   r   r)   r   )3r   
__future__r   typingrB   collectionsr   dataclassesr   heapqr   r   	itertoolsr   sqlglotr   r	   r   sqlglot.helperr
   TYPE_CHECKINGsqlglot.dialects.dialectr   r   r   r   r    r!   sqlglot._typingr"   UnionEditrL   AliasBooleanColumnDataTypeLambdaLiteralTableWindowrx   
Identifierrc   rK   r   r}   r   r   r   r   r   r   r   r   <module>   sZ    _ 
Q




	