o
    ?Di&#                     @   s  d dl mZmZmZmZmZmZ d dlZer"ddlm	Z	 d dl
mZ ddlmZ ddlmZmZmZ dd	lmZ ddlmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ d dlZddl m!Z!m"Z" ddl#T dee defddZ$G dd dZ%dgZ&dS )    )OptionalListAnyUnionIterableTYPE_CHECKINGN   Catalog)	DataFrame   ContributionsAcceptedError)
StructType
AtomicTypeDataType)	SparkConf)RuntimeConfigDataFrameReader)SparkContextUDFRegistrationDataStreamReader)PySparkTypeErrorPySparkValueError)*dataschemac                    sJ   ddl m  g }| D ]} fddt|dd |D D }|| q
|S )Nr   Valuec                    s   g | ]
\}} ||j qS  )duckdb_type).0xdtyper    r"   ^/var/www/Datamplify/venv/lib/python3.10/site-packages/duckdb/experimental/spark/sql/session.py
<listcomp>*   s    z,_combine_data_and_schema.<locals>.<listcomp>c                 S   s   g | ]}|j qS r"   )dataType)r$   yr"   r"   r'   r(   *   s    )duckdbr!   zipappend)r   r   new_datarownew_rowr"   r    r'   _combine_data_and_schema%   s   "r1   c                   @   s  e Zd ZdefddZdeee df defddZ	dddefd	d
Z
			d:dedee f deeeee f  dee dedef
ddZd;ddZ			d<dedee dedee ddf
ddZdededefddZd=d d!Zd"edefd#d$Zd;d%d&Zed>d(d)Zedefd*d+Zedefd,d-Zedefd.d/Z edefd0d1Z!edefd2d3Z"ede#fd4d5Z$edefd6d7Z%G d8d9 d9Z&e& Z'dS )?SparkSessioncontextc                 C   s   |j | _|| _t| j| _d S N)
connectionconn_contextr   _confselfr3   r"   r"   r'   __init__0   s   zSparkSession.__init__r   PandasDataFramereturnc                 C   s   zdd l }d}W n ty   d}Y nw |r7t||jr7dt  }| j|| t| jd| d| S dd }t|t	sDt	|}|| d	t
fd
d}||}dd }||}	| jj||	d}
t|
| S )Nr   TFpyspark_pandas_df_zselect * from ""c                 S   sx   t | dkrd S t | d }t| dd  D ]#\}}t |}||kr#qtdd| d|d  t|t|ddd S )Nr   r   LENGTH_SHOULD_BE_THE_SAMEr   )arg1arg2arg1_lengtharg2_lengtherror_classmessage_parameters)len	enumerater   str)tuplesexpected_lengthiitemactual_lengthr"   r"   r'   verify_tuple_integrity@   s    z>SparkSession._create_dataframe.<locals>.verify_tuple_integrityr=   c                    sF   dd  t | d  fddt| D }d|}d| d}|S )	Nc                    s4   t | } fddt|D }dd| d }|S )Nc                    s   g | ]	}d |   qS )$r"   )r$   r%   start_param_idxr"   r'   r(   Y   s    zjSparkSession._create_dataframe.<locals>.construct_query.<locals>.construct_values_list.<locals>.<listcomp>(, ))rH   rangejoin)r/   rS   parameter_count
parametersr"   rR   r'   construct_values_listW   s   zVSparkSession._create_dataframe.<locals>.construct_query.<locals>.construct_values_listr   c                    s"   g | ]\}} |d |  qS )r   r"   )r$   rM   r%   r[   row_sizer"   r'   r(   ^   s   " zKSparkSession._create_dataframe.<locals>.construct_query.<locals>.<listcomp>rU   z'
                select * from (values z)
            )rH   rI   rX   )rK   values_listqueryr"   r\   r'   construct_queryV   s   
z7SparkSession._create_dataframe.<locals>.construct_queryc                 S   s    g }| D ]	}| t| q|S r4   )extendlist)rK   rZ   r/   r"   r"   r'   construct_parametersh   s   z<SparkSession._create_dataframe.<locals>.construct_parameters)params)pandasImportError
isinstancer   uuiduuid1r6   registersqlrb   rJ   )r:   r   re   
has_pandasunique_namerP   r`   r_   rc   rZ   relr"   r"   r'   _create_dataframe5   s(   

zSparkSession._create_dataframec                 C   s*   |  |}|r|j| }|r|j| }|S r4   )ro   _cast_typestoDF)r:   r   typesnamesdfr"   r"   r'   _createDataFrameFromPandass   s   


z'SparkSession._createDataFrameFromPandasNTr   samplingRatioverifySchemac                 C   s  |rt |st d }d }t|trtdddid|r)t|tr'| \}}n|}zdd l}d}W n ty<   d}Y nw |rLt||jrL| |||S d}	|s^|r^d}	t	dd	 |D g}|rjt|trjt
||}| |}
|	r~|
j}|d
}t|| }
|r|
j| }
|r|
j| }
|
S )NSHOULD_NOT_DATAFRAMEarg_namer   rE   r   TFc                 s   s    | ]}d V  qd S r4   r"   )r$   _r"   r"   r'   	<genexpr>   s    z/SparkSession.createDataFrame.<locals>.<genexpr>z1=0)NotImplementedErrorrg   r   r   r   extract_types_and_namesre   rf   ru   tupler1   ro   relationfilterrp   rq   )r:   r   r   rv   rw   rr   rs   re   rl   is_emptyrt   rn   r"   r"   r'   createDataFrame~   sN   







zSparkSession.createDataFramec                 C   s
   t | jS r4   )r2   r7   r:   r"   r"   r'   
newSession   s   
zSparkSession.newSessionr   startendstepnumPartitionsr   c                 C   s4   |rt |d u r|}d}t| jjd|||gd| S )Nr   rW   )rZ   )r   r   r6   table_function)r:   r   r   r   r   r"   r"   r'   rW      s   zSparkSession.rangesqlQuerykwargsc                 K   s   |rt | j|}t|| S r4   )r|   r6   rk   r   )r:   r   r   r   r"   r"   r'   rk      s   
zSparkSession.sqlc                 C   s   | j   d S r4   )r7   stopr   r"   r"   r'   r      s   zSparkSession.stop	tableNamec                 C   s   | j |}t|| S r4   )r6   tabler   )r:   r   r   r"   r"   r'   r      s   
zSparkSession.tablec                 C      | S r4   r"   r   r"   r"   r'   getActiveSession      zSparkSession.getActiveSessionr
   c                 C   s&   t | dsddlm} || | _| jS )N_catalogr   r	   )hasattr%duckdb.experimental.spark.sql.catalogr
   r   )r:   r
   r"   r"   r'   catalog   s   

zSparkSession.catalogc                 C      | j S r4   )r8   r   r"   r"   r'   conf      zSparkSession.confc                 C      t | S r4   r   r   r"   r"   r'   read      zSparkSession.readc                 C   r   r4   r   r   r"   r"   r'   
readStream   r   zSparkSession.readStreamc                 C   r   r4   )r7   r   r"   r"   r'   sparkContext   r   zSparkSession.sparkContextc                 C   s   t r4   r   r   r"   r"   r'   streams      zSparkSession.streamsc                 C   r   r4   r   r   r"   r"   r'   udf   r   zSparkSession.udfc                 C   s   dS )Nz1.0.0r"   r   r"   r"   r'   version   r   zSparkSession.versionc                	   @   s   e Zd Zdd Zdedd fddZdedd fddZd	edd fd
dZdddZ	dde	e de	e
 de	e dd fddZdddZdS )SparkSession.Builderc                 C   s   d S r4   r"   r   r"   r"   r'   r;     r   zSparkSession.Builder.__init__namer=   c                 C   r   r4   r"   r:   r   r"   r"   r'   master  r   zSparkSession.Builder.masterc                 C   r   r4   r"   r   r"   r"   r'   appName  r   zSparkSession.Builder.appNameurlc                 C   r   r4   r"   )r:   r   r"   r"   r'   remote  r   zSparkSession.Builder.remoter2   c                 C   s   t d}t|S )N__ignored__)r   r2   r9   r"   r"   r'   getOrCreate  s   z SparkSession.Builder.getOrCreateNkeyvaluer   c                 C   r   r4   r"   )r:   r   r   r   r"   r"   r'   config  s   zSparkSession.Builder.configc                 C   r   r4   r"   r   r"   r"   r'   enableHiveSupport  r   z&SparkSession.Builder.enableHiveSupportr=   r2   )NNN)r=   r   )__name__
__module____qualname__r;   rJ   r   r   r   r   r   r   r   r   r   r"   r"   r"   r'   Builder  s"    

r   )NNTr   )Nr   N)r=   N)r=   r
   )(r   r   r   r   r;   r   r   r   r   ro   ru   r   r   r   rJ   floatboolr   r   intrW   rk   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   builderr"   r"   r"   r'   r2   /   sn    >

?



r2   )'typingr   r   r   r   r   r   rh   r   r
   pandas.core.framer   r<   	exceptionr   rr   r   r   r   r   r   	dataframer   
readwriterr   r3   r   r   r   	streamingr   r+   errorsr   r   errors.error_classesr1   r2   __all__r"   r"   r"   r'   <module>   s*     
 
v