o
    8Di8                     @  s
  d dl mZ d dlZd dlmZmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZmZ ejrSd d	lmZ d d
lmZ d-ddZ d.ddZ!d/ddZ"d0ddZ#d1ddZ$d1d d!Z%d1d"d#Z&d2d)d*Z'G d+d, d,eZ(dS )3    )annotationsN)exp
transforms)binary_from_functionbuild_formatted_timeis_parse_jsonpivot_column_namesrename_functrim_sqlunit_to_str)Hive)seq_getensure_list)	TokenType)
preprocessremove_unique_constraints'ctas_with_tmp_tables_to_create_tmp_view%move_schema_columns_to_partitioned_by)E)TypeAnnotatorselfSpark2.Generator
expressionexp.Mapreturnstrc                 C  s8   |j d}|j d}|r|s| dS | d||S )NkeysvaluesMAPMAP_FROM_ARRAYS)argsgetfunc)r   r   r   r    r#   P/var/www/Datamplify/venv/lib/python3.10/site-packages/sqlglot/dialects/spark2.py_map_sql   s
   
r%   to_type$t.Callable[[t.List], exp.Expression]c                   s    fddS )Nc                   s   t jt| dt j dS )Nr   )thisto)r   Castr   DataTypebuildr    r&   r#   r$   <lambda>*   s    z _build_as_cast.<locals>.<lambda>r#   r.   r#   r.   r$   _build_as_cast)   s   r0   exp.StrToDatec                 C  s2   |  |}|tjkr| d|jS | d|j|S )NTO_DATE)format_timer   DATE_FORMATr"   r(   )r   r   time_formatr#   r#   r$   _str_to_date-   s   

r6   exp.UnixToTimec                 C  s   |j d}|j}|d u r| ttd|tjjj	S |tj
jkr)| d|S |tj
jkr5| d|S |tj
jkrA| d|S tj|tdd|d}| d|S )	Nscalefrom_unixtimeTIMESTAMP_SECONDSTIMESTAMP_MILLISTIMESTAMP_MICROSPOW
   )r(   r   )r    r!   r(   sqlr   castr"   r+   Type	TIMESTAMP
UnixToTimeSECONDSMILLISMICROSDiv)r   r   r8   	timestampunix_secondsr#   r#   r$   _unix_to_time_sql4   s    rJ   exp.Expressionc                 C  sv   t | tjr9| jjdr9| jjd d }|jr9|jd  }tj| jt	dj
| j ddj|dddS | S )	a  
    Spark doesn't allow PIVOT aliases, so we need to remove them and possibly wrap a
    pivoted source in a subquery with the same alias to preserve the query's semantics.

    Example:
        >>> from sqlglot import parse_one
        >>> expr = parse_one("SELECT piv.x FROM tbl PIVOT (SUM(a) FOR b IN ('x')) piv")
        >>> print(_unalias_pivot(expr).sql(dialect="spark"))
        SELECT piv.x FROM (SELECT * FROM tbl PIVOT(SUM(a) FOR b IN ('x'))) AS piv
    pivotsr   alias*F)copy)rM   rO   r(   )
isinstancer   Fromr(   r    r!   rM   popreplaceselectfrom_rO   subquery)r   pivotrM   r#   r#   r$   _unalias_pivotE   s   rY   c                 C  s(   t | tjr| ddd | jD  | S )a  
    Spark doesn't allow the column referenced in the PIVOT's field to be qualified,
    so we need to unqualify it.

    Example:
        >>> from sqlglot import parse_one
        >>> expr = parse_one("SELECT * FROM tbl PIVOT (SUM(tbl.sales) FOR tbl.quarter IN ('Q1', 'Q2'))")
        >>> print(_unqualify_pivot_columns(expr).sql(dialect="spark"))
        SELECT * FROM tbl PIVOT(SUM(tbl.sales) FOR quarter IN ('Q1', 'Q1'))
    fieldsc                 S  s   g | ]}t |qS r#   )r   unqualify_columns).0fieldr#   r#   r$   
<listcomp>l       z,_unqualify_pivot_columns.<locals>.<listcomp>)rQ   r   PivotsetrZ   )r   r#   r#   r$   _unqualify_pivot_columns_   s
   rb   c                 C  s*   t jt jdd}| jd d| | S )NparquetrP   
propertiesexpressions)r   FileFormatPropertyLiteralstringr    append)r   providerr#   r#   r$   temporary_storage_providerr   s   rk   r   r   r    target_type exp.DataType | exp.DataType.Typec          
      G  s   |  | g }|D ]}|j|}|dd t|D  q	d}d}|D ]}	|	tjjj	r1d}q$|	|r<d}|} n|	j
}q$| ||rMtjjj	 |S | |S )a  
    Infers the type of the expression according to the following rules:
    - If all args are of the same type OR any arg is of target_type, the expr is inferred as such
    - If any arg is of UNKNOWN type and none of target_type, the expr is inferred as UNKNOWN
    c                 s  s    | ]}|r|V  qd S Nr#   )r\   exprr#   r#   r$   	<genexpr>   s    z,_annotate_by_similar_args.<locals>.<genexpr>NFT)_annotate_argsr    r!   extendr   is_typer   r+   rA   UNKNOWNtype	_set_type)
r   r   rl   r    re   argarg_exprlast_datatypehas_unknownro   r#   r#   r$   _annotate_by_similar_argsy   s&   

r{   c                   @  sn   e Zd Zi ejejdd ejdd ejdd iZG dd dej	Z	G dd dej
Z
G d	d
 d
ejZdS )Spark2c                 C  s   |  |dS )Nr(   )_annotate_by_argsr   er#   r#   r$   r/      s    zSpark2.<lambda>c                 C  s   t | |dtjjjdS )Nre   rl   r{   r   r+   rA   TEXTr~   r#   r#   r$   r/      s    c                 C  s   t | |ddtjjjdS )Nr(   fill_patternr   r   r~   r#   r#   r$   r/      s    c                   @  s*   e Zd ZddgZi ejjdejiZdS )zSpark2.Tokenizer)zX'')zx'r   rB   N)	__name__
__module____qualname__HEX_STRINGSr   	TokenizerKEYWORDSr   TIMESTAMPTZr#   r#   r#   r$   r      s    
r   c                
   @  sT  e Zd ZdZi ejji dejj	dej
j	deddeddd	d
 ddd
 ddd
 ddd
 deddedddd
 deddejj	dejj	deejdeejdedejj	edd d
 ejj	d!d
 d"d
 d#d
 d$Zi ejjd%d
 d&d
 d'd
 d(d
 d)d
 d*d
 d+d
 d,d
 d-Zd8d0d1Zd9d5d6Zd7S ):zSpark2.ParserT	AGGREGATEAPPROX_PERCENTILEBOOLEANbooleanDATEdate
DATE_TRUNCc                 C  s    t jt| dt t| ddS )N   r   )r(   unit)r   TimestampTruncr   varr-   r#   r#   r$   r/      s    zSpark2.Parser.<lambda>
DAYOFMONTHc                 C     t jt jt| dddS Nr   rP   )r   
DayOfMonthTsOrDsToDater   r-   r#   r#   r$   r/          	DAYOFWEEKc                 C  r   r   )r   	DayOfWeekr   r   r-   r#   r#   r$   r/      r   	DAYOFYEARc                 C  r   r   )r   	DayOfYearr   r   r-   r#   r#   r$   r/      r   DOUBLEdoubleFLOATfloatFROM_UTC_TIMESTAMPc                 C  8   t jt jt| dpt jddt jjj|dt| ddS Nr    rP   dialectr   )r(   zone)r   
AtTimeZoner@   r   Varr+   rA   rB   r    r   r#   r#   r$   r/          INTintr   RLIKE	SHIFTLEFT
SHIFTRIGHTSTRINGrh   rH   c                 C  s(   t | dkrtd| S ttjd| S )Nr   rH   spark)lenr0   r   r   	StrToTimer-   r#   r#   r$   r/      s   c                 C  r   r   )r   FromTimeZoner@   r   r   r+   rA   rB   r   r#   r#   r$   r/      r   c                 C  s   t jt| dt| ddS )Nr   r   )r   r(   )r   	DateTruncr   r-   r#   r#   r$   r/      r   c                 C  r   r   )r   
WeekOfYearr   r   r-   r#   r#   r$   r/      r   )SLICErB   TO_TIMESTAMPTO_UNIX_TIMESTAMPTO_UTC_TIMESTAMPTRUNC
WEEKOFYEARc                 C  
   |  dS )N	BROADCAST_parse_join_hintr   r#   r#   r$   r/         
 c                 C  r   )NBROADCASTJOINr   r   r#   r#   r$   r/      r   c                 C  r   )NMAPJOINr   r   r#   r#   r$   r/      r   c                 C  r   )NMERGEr   r   r#   r#   r$   r/      r   c                 C  r   )NSHUFFLEMERGEr   r   r#   r#   r$   r/      r   c                 C  r   )N	MERGEJOINr   r   r#   r#   r$   r/      r   c                 C  r   )NSHUFFLE_HASHr   r   r#   r#   r$   r/      r   c                 C  r   )NSHUFFLE_REPLICATE_NLr   r   r#   r#   r$   r/      r   )r   r   r   r   r   r   r   r   r   "t.Optional[exp.Drop | exp.Command]c                 C  s"   |  ddo| jtj|  ddS )NDROPCOLUMNS)r(   kind)_match_text_seqr   r   Drop_parse_schemar   r#   r#   r$   _parse_drop_column   s   z Spark2.Parser._parse_drop_columnaggregationst.List[exp.Expression]t.List[str]c                 C  s   t |dkrg S t|ddS )Nr   r   r   )r   r   )r   r   r#   r#   r$   _pivot_column_names   s   z!Spark2.Parser._pivot_column_namesN)r   r   )r   r   r   r   )r   r   r   TRIM_PATTERN_FIRSTr   Parser	FUNCTIONSr   Reducefrom_arg_listApproxQuantiler0   Map
RegexpLiker   BitwiseLeftShiftBitwiseRightShift
ArraySlice	StrToUnixFUNCTION_PARSERSr   r   r#   r#   r#   r$   r      sz    	


0
r   c                      sN  e Zd ZdZdZdZi ejje	j
e	jjje	je	jjje	je	jjje	je	jjjiZi ejji e	jede	jdd e	jede	jede	jdd e	jede	jed	e	jeed
d ege	jede	jdd e	j ede	j!ede	j"dd e	j#ede	j$e%e&ge	j'dd e	j(ede	j)ede	j*e+e	j,e%e-ge	j.ede	j/dd e	j0e%e%j1e%j2e%j3e%j4ge	j5e6e	j7dd e	j8dd e	j9e:e	j;e<e	j=ede	j>ede	j?e%e%j@giZeAe	jB eAe	jC eAe	jD eAe	jE eAe	jF dZGdZHd*dd ZId+d, fd%d&ZJd- fd(d)ZK  ZLS ).r   TAPPROX_COUNT_DISTINCTc                 C  s   d|  |d dS )Nz
AGGREGATE(r(   z%, 0, (acc, x) -> acc + x, acc -> acc))r?   r~   r#   r#   r$   r/     s   zSpark2.Generator.<lambda>
ARRAY_JOINr   c                 C     |  d|j|jdS )Nr   r   r"   r(   r    r!   r~   r#   r#   r$   r/   
      r   r   c                 C  s
   t | tS rn   )r   rk   )r   r#   r#   r$   r/     s    	MAKE_DATEc                 C  s   |  d|jt|S )Nr   )r"   r(   r   r~   r#   r#   r$   r/         r   r   c                 C  s   d|  d|j dS )Nz((r   z
 % 7) + 1))r"   r(   r~   r#   r#   r$   r/     r_   r   c                 C  r   )Nr   r   r   r~   r#   r#   r$   r/      r   BOOL_ANDBOOL_ORr   c              	   C  s$   |  d|j|j|jd |jdS )NREGEXP_REPLACEreplacementposition)r"   r(   r   r    r!   r~   r#   r#   r$   r/   (  s    
c                 C  s   |  d|j| |S )Nr   )r"   r(   r3   r~   r#   r#   r$   r/   8  r_   c                 C  s   |  dt||jS )Nr   )r"   r   r(   r~   r#   r#   r$   r/   9  r   VAR_POPr   Fr   
exp.Structr   r   c                 C  s   ddl m} || |S )Nr   )	Generator)sqlglot.generatorr   
struct_sql)r   r   r   r#   r#   r$   r   K  s   zSpark2.Generator.struct_sqlNexp.Castsafe_prefixt.Optional[str]c                   s   |j }t|tjtjfo|jd }|jjdr8t|s |r8d| 	|d d}| 
d|r3||S |j |S t|rB| 
d|S ttj| j||dS )Nvariant_extractnestedr   r)   	FROM_JSONTO_JSON)r   )r(   rQ   r   JSONExtractJSONExtractScalarr    r!   r)   r   r?   r"   superr   r   cast_sql)r   r   r   rw   is_json_extractschema	__class__r#   r$   r  P  s   zSpark2.Generator.cast_sqlexp.FileFormatPropertyc                   s(   |j drt |S d|j  S )Nhive_formatzUSING )r    r!   r  fileformatproperty_sqlnameupper)r   r   r  r#   r$   r
  `  s   z'Spark2.Generator.fileformatproperty_sql)r   r   r   r   rn   )r   r   r   r   r   r   )r   r  r   r   )Mr   r   r   QUERY_HINTSNVL2_SUPPORTEDCAN_IMPLEMENT_ARRAY_ANYr   r   PROPERTIES_LOCATIONr   EngineProperty
PropertiesLocationUNSUPPORTEDAutoIncrementPropertyCharacterSetPropertyCollateProperty
TRANSFORMSApproxDistinctr	   ArraySumArrayToStringr   r   r   r   Creater   r   r   DateFromPartsr   r   r   DayOfWeekIsor   rR   r   rY   r   
LogicalAnd	LogicalOrr   r%   r`   rb   r   RegexpReplaceSelecteliminate_qualifyeliminate_distinct_onunnest_to_explodeany_to_exists	StrToDater6   r   r   Trimr
   rC   rJ   VariancePopr   WithinGroup#remove_within_group_for_percentilesrS   	ArraySortILikeLeftMonthsBetweenRightWRAP_DERIVED_VALUESCREATE_FUNCTION_RETURN_ASr   r  r
  __classcell__r#   r#   r  r$   r      s    















 
!





?
r   N)r   r   r   r   
ANNOTATORSr   	SubstringConcatPadr   r   r   r#   r#   r#   r$   r|      s    


Ir|   )r   r   r   r   r   r   )r&   r   r   r'   )r   r   r   r1   r   r   )r   r   r   r7   r   r   )r   rK   r   rK   )
r   r   r   r   r    r   rl   rm   r   r   ))
__future__r   typingtsqlglotr   r   sqlglot.dialects.dialectr   r   r   r   r	   r
   r   sqlglot.dialects.hiver   sqlglot.helperr   r   sqlglot.tokensr   sqlglot.transformsr   r   r   r   TYPE_CHECKINGsqlglot._typingr    sqlglot.optimizer.annotate_typesr   r%   r0   r6   rJ   rY   rb   rk   r{   r|   r#   r#   r#   r$   <module>   s(    $	








 