o
    Ci
b                     @   sh  d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lZd d
lZd dlmZ dd Zd dlmZ defddZdd ZG dd dZ	d7dededefddZd d
l Z!dd Z"d dl#m$Z$ dd  Z%d!d" Z&d#d$ Z'd%d& Z(d'd( Z)d)d* Z*d+d, Z+d-d. Z,d d/lm-Z- d0d1 Z.d2d3 Z/d8d5d6Z0d
S )9    )decode_valueflatten_documentdecrypt_json)create_engine)quote)Path)PlainTextAuthProvider)Cluster)MongoClient)models)textN)datetimec                 C   sd   | |kr| S | dkr|S |dkr| S | dkr|dkrdS | dkr&|dkr&dS | |hddhkr0dS dS )Nnullintegerfloatdater   string )existingnewr   r   (/var/www/Datamplify/Connections/utils.pypromote_type   s   r   )defaultdictreturnc                 C   s   | d u rdS t | trdS t | trdS t | trdS t | tr7zt| dd W dS  ty6   Y dS w t | t	r>d	S t | t
rEd
S dS )Nr   booleanr   r   Zz+00:00r   r   objectarray)
isinstanceboolintr   strr   fromisoformatreplace	Exceptiondictlist)valuer   r   r   detect_primitive_type,   s(   





r(   c                 C   s   t | tsd S |  D ]K\}}t |tr"| d| }t||| qt |trK| d| }|D ]}t |tr>t||| q0t|}||d| q0qt|}|||| qd S )N__r'   )r   r%   itemsdiscover_schema_with_typesr&   r(   
add_column)objtable	collectorkvchild_tableitemdtyper   r   r   r+   J   s"   



r+   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )TypedSchemaCollectorc                 C   s   t t| _d S N)r   r%   tables)selfr   r   r   __init__   s   zTypedSchemaCollector.__init__c                 C   sB   || j | vr|| j | |< d S t| j | | || j | |< d S r6   )r7   r   )r8   r.   columnr4   r   r   r   r,      s
   zTypedSchemaCollector.add_columnc                    sT   dd l  g }| j D ]\}} fdd| D }| dd||d q|S )Nr   c                    s$   g | ]\}}  d d||dqS )[^a-zA-Z0-9_]_colr4   )sub).0r>   r4   rer   r   
<listcomp>   s    z/TypedSchemaCollector.result.<locals>.<listcomp>r;   r<   r7   columns)rB   r7   r*   appendr?   )r8   r7   r.   r>   r   rA   r   result   s   
zTypedSchemaCollector.resultN)__name__
__module____qualname__r9   r,   rG   r   r   r   r   r5      s    r5      endpoint
root_tablesample_pagesc                 C   s8   t  }| |i }|sg S |D ]}t||| q| S r6   )r5   
fetch_pager+   rG   )clientrL   rM   rN   r/   recordsrecordr   r   r   discover_endpoint_schema   s   rS   c                 C   sF   | j dkr!tj| |   dd  s!tj| dd}|  S dS )zB
    Returns True if the column likely contains date strings.
    r   coerceerrorsF)r4   pd
to_numericnotnaallto_datetimeany)series	convertedr   r   r   is_date_column   s
   
r_   )parserc                 C   s  g }| j D ]}| | jdkrz| | jjddd r || W n ty2   td| d Y nw zNtj	j
| | rt| | | |   | |< | | d  }t|trl|dkrltj| | d	d
t| |< ntj| | | |   d	d
t| |< W q ty } zW Y d }~qd }~ww q|D ]-}z| | jjdddd| |< W q ty } ztd| d|  W Y d }~qd }~ww | S )Nr   ,FnaSkipping column .: Not a string column or contains invalid datarK           rT   rU    )regexError converting column z to numeric: )rE   r4   r!   containsr\   rF   AttributeErrorprintrW   apitypesis_numeric_dtyperX   rY   sumr   r   astyper    r$   r#   )dfnumeric_columnsr>   sum_moder:   r   r   r   convert_numeric_columns   s@   

(rv   c                 C   s   g }| j D ].}| | jdkr3z| | jjddd r || W q ty2   td| d Y qw q|D ]2}z| | jd	t
d d| |< W q6 tyh } ztd	| d
|  W Y d }~q6d }~ww | S )Nr   %Frb   rd   re   g      Y@   ri   z to percentage: )rE   r4   r!   rj   r\   rF   rk   rl   rstriprq   r   roundr$   )rr   percentage_columnsr>   r:   ru   r   r   r   convert_percentage_columns   s(   

(r|   c                    s    fdd j D }|D ]`}zLz6 | dd  |< tjj | r. | jd  |< tj | dd |<  | dd  |< W n t	yX } zW Y d }~W qd }~ww W q t	yl } zW Y d }~qd }~ww  S )Nc                    s   g | ]
}t  | r|qS r   )r_   r@   r>   rr   r   r   rC      s    z,convert_datetime_columns.<locals>.<listcomp>c                 S   s"   t | rt| trt| S | S r6   )rW   notnullr   r!   r`   parsexr   r   r   <lambda>     " z*convert_datetime_columns.<locals>.<lambda>rT   rU   c                 S   s"   t | rt | st | S d S r6   )rW   rY   NaT	Timestampr   r   r   r   r     r   )
rE   applyrW   rm   rn   is_datetime64_any_dtypedttz_localizer[   r$   )rr   potential_date_columnsr:   ru   r   r~   r   convert_datetime_columns   s*   r   c              
   C   s(  z|dddddddddd	}g }| j D ]d}t| | j}|dkr5| | d  }t|tr3|d	kr3d}nd
}| |   rZ|dt|	 v rMd| dn
d| d||  d q|dt|	 v rjd| dn
d| d||  d qd
|W S  ty } zt| W Y d }~d S d }~ww )NStringInt64Float64Bool
DateTime64)	r   r   int64int32UInt64float64r   zdatetime64[ns]z&datetime64[ns, tzoffset(None, -28800)]r   rK   rf   r   r   z "z5" Nullable(DateTime64) DEFAULT '1970-01-01 00:00:00' z" Nullable()z+" DateTime64 DEFAULT '1970-01-01 00:00:00' z"  z, )rE   r!   r4   rp   r   r   isnullr\   rF   lowerjoinr$   rl   )rr   type_mappingclickhouse_schemar:   r4   rt   ru   r   r   r   map_dtypes_to_clickhouse  s6   
::r   c                 C   sH   zt j| j\} }W | |fS    tt| }|j|j} }Y | |fS r6   )ospathsplitextnamer   r!   stemsuffix)	file_namefile_extensionserver_pathr   r   r   fetch_filename_extensionA  s   r   c              
   C   s\   z|   }|   dd d d}|W S  ty- } zdt| d}|W  Y d }~S d }~ww )N   statusenginecursor  r   message)connectshutdownr$   r!   )clustersessiondataru   r   r   r   cassandra_dbJ  s   r   c                 C   s   | d ks| dkrddd}|S |dkrdt |  }nF|dkrat| \}}|dks5|d	ks5|d
ks5|dkr6nddd}|S ztt jj}tj|t | }d| }W n   dt |  }Y d|d}|S )Nrg   i  zdatabase_path is mandatoryr   MICROSOFTACCESSzHaccess+pyodbc:///?Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=SQLITEz.dbz.sqlitez.sqlite3znot acceptable/invalid filez
sqlite:///r   )r   url)	r!   r   r   __file__resolveparentr   r   r   )r   	parameterr   r   r   r   BASE_DIRdb_file_pathr   r   r   server_path_function\  s2    r   c           
   
   C   s   z<| dks	| d kr|dks|d krt |t|}nd|  d| d| dt| d| 
}t |}|| }d||d}|W S  tyX }	 zdt|	 d	}|W  Y d }	~	S d }	~	ww )
Nrg   z
mongodb://:@/r   r   r   r   )r
   r    r$   r!   )
usernamepassworddatabasehostnameportrP   connection_stringdbr   ru   r   r   r   mongo_db  s$    &r   )
quote_plusc                 C   s  zt t|}W n   Y t|  dkr" d| ||||}	n dkr: d|  d| d| d| d| 
}	nn dkrR d	|  d| d| d| d
| 
}	nV dkrj d|  d| d| d
| d| 
}	n> dkr d|  d| d| d| d
| 
}	n& dkr d}
| r|d ks| r|dks| r|dkrd|
 d| d| d}nd|
 d| d| d|  dt| d}zt|}W n ty } zdddW  Y d }~S d }~ww |}| }d||d}|S  dkr t	||}|d  dkr|d! }	n|S n d"kr t	||}|d  dkr|d! }	n|S n d#kr4 d$|  d| d| d| d
| 
}	nt d%krD t
| ||||}|S  d&kr^ t| |d'}t|g||d(}t|}|S  d)krv d*|  d| d| d| d
| 
}n2 d+kr d*|  d| d| d| d
| 
}ndkrt|d,kr|}| }ndd-d}|S 	 z
t|	}| }W n ty } zdddW  Y d }~S d }~ww d||d}|S ).N
POSTGRESQLzpostgresql://{}:{}@{}:{}/{}ORACLEzoracle+oracledb://r   r   z/?service_name=MYSQLzmysql+mysqlconnector://r   	SNOWFLAKEzsnowflake://z?port=IBMDB2zibm_db_sa://MICROSOFTSQLSERVERzODBC Driver 18 for SQL Serverrg   zDRIVER={z	};SERVER=z
;DATABASE=z>;Trusted_Connection=yes;Encrypt=no;TrustServerCertificate=yes;z;UID=z;PWD=z';Encrypt=no;TrustServerCertificate=yes;r   zInvalid Credentialsr   r   r   r   r   r   r   SYBASEzsybase+pyodbc://MONGODB	CASSANDRA)r   r   )r   auth_providerzSAP HANAzhana+hdbcli://zSAP BWi  zInvalid port)r   r   r!   upperformatpyodbcr   r$   r   r   r   r   r	   r   r    r   )r   r   r   r   r   service_namer   r   password1234r   driverr   connru   r   r   r   
sq_msaccesmongor   r   	cassandrar   r   r   server_connection  s   


&
&
&
&
$(



$$$
r   c           	   	   C   s  |g}t jjj|d}t|dr|jr||jj t jjj| |d}t j	jj|j
jd}|j
 dkr\t jjj|jd}t|j|j|j|j|j|j|j|j}|j|d< |j|d< |S |j
 dkri }t jjj|jd}t|j|d< t|j|d	< |j|d< |j|d
< |S d)N)id
created_by)r   user_id__inr   schematypeintegrationstoken_metadatacredentialsintegration_idzNot Implemented)conn_modelsUserProfileobjectsgethasattrr   rF   r   ConnectionsDataSourcesr   r   DatabaseConnectionstable_idr   r   r   r   r   r   r   r   database_pathr   Integrationsr   r   r   )	r   user_idaccessible_user_idsuserConnection_detailsserver_typedatabase_detailsr   Integration_detailsr   r   r   generate_engine  s.   



r   publicc                 C   s>  |    dkr |td}n dkr |td}n dkr. |td| d}n dkrS z	|td	}W n tyR } z
t| W Y d }~nd }~ww  d
kr^ |d}n dkro |td| d}n dkr| |td}nz dkr |td}nmdkrg }| }|D ]W}|| }i }	|i }
dd |
D }|D ]}| D ]\}}t|j	|	|< q ddddddddddd
}g }|	 D ]\}}|
|d}|||d q|||d q|S 	 td|  g }| D ] }|d }|d  }|d!}d"d |D }||d}|| q|S )#Nmysqla  SELECT
                    TABLE_NAME,
                    GROUP_CONCAT(CONCAT(COLUMN_NAME, ':', DATA_TYPE) ORDER BY ORDINAL_POSITION) AS columns
                FROM
                    INFORMATION_SCHEMA.COLUMNS
                WHERE
                    TABLE_SCHEMA = DATABASE()  -- Or specify a specific database name
                GROUP BY
                    TABLE_NAME
                ORDER BY
                    TABLE_NAME;oracleav  SELECT
                    TABLE_NAME,
                    LISTAGG(COLUMN_NAME || ':' || DATA_TYPE, ', ') WITHIN GROUP (ORDER BY COLUMN_ID) AS columns
                FROM
                    ALL_TAB_COLUMNS
                WHERE
                    OWNER = USER
                GROUP BY
                    TABLE_NAME
                ORDER BY
                    TABLE_NAME	snowflakea  SELECT
                    TABLE_NAME,
                    LISTAGG(COLUMN_NAME || ':' || DATA_TYPE, ', ') WITHIN GROUP (ORDER BY ORDINAL_POSITION) AS columns
                FROM
                    INFORMATION_SCHEMA.COLUMNS
                WHERE
                    TABLE_SCHEMA =  'z' -- Or specify a schema/database
                    AND TABLE_CATALOG = CURRENT_DATABASE()
                    GROUP BY
                    TABLE_NAME
                ORDER BY
                    TABLE_NAME;sqlitea0  SELECT
                        m.name AS table_name,
                        GROUP_CONCAT(p.name || ':' || p.type, ', ') AS columns
                    FROM sqlite_master AS m
                    JOIN pragma_table_info(m.name) AS p
                    GROUP BY m.name
                    ORDER BY m.name;microsoftsqlservera  SELECT
                t.name AS table_name,
                STRING_AGG(c.name + ':' + TYPE_NAME(c.user_type_id), ', ') WITHIN GROUP (ORDER BY c.column_id) AS columns
            FROM sys.tables AS t
            INNER JOIN sys.columns AS c ON t.object_id = c.object_id
            INNER JOIN sys.schemas AS s ON t.schema_id = s.schema_id
            WHERE s.name = 'dbo'  -- Or specify the schema name
            GROUP BY t.name
            ORDER BY t.name;
postgresqlzSELECT 
                    table_name, 
                    STRING_AGG(column_name || ':' || data_type, ',' ORDER BY ordinal_position) AS columns 
                FROM information_schema.columns 
                WHERE table_schema = 'zL' 
                GROUP BY table_name 
                ORDER BY table_name;mariadbdb2a0  SELECT
                    TABNAME AS table_name,
                    LISTAGG(COLNAME || ':' || TYPENAME, ', ') WITHIN GROUP (ORDER BY COLNO) AS columns
                FROM SYSCAT.COLUMNS
                WHERE TABSCHEMA = CURRENT SCHEMA
                GROUP BY TABNAME
                ORDER BY TABNAME;mongodbc                 S   s   g | ]}t |qS r   )r   )r@   docr   r   r   rC     s    z%get_table_details.<locals>.<listcomp>r   numericzcharacter varying	timestampr   json)
r    r   r!   r   ObjectIdr   r&   r%   NoneTyper  r=   rD   zUnsupported database type: r   rK   ra   c                 S   s2   g | ]}| d d  | d d  dqS )r   r   rK   r=   )splitstripr}   r   r   r   rC     s   2 )r   executer   r$   rl   list_collection_namesfindr*   r   rH   r   rF   
ValueErrorfetchallr  )database_typer   r   cursor_dataru   r7   collections	coll_name
collectionfieldsr  
dict_itemsrowkeyr'   sql_type_mapcolumns_listr>   py_typesql_type
table_namecolumn_stringcolumn_listrE   
table_datar   r   r   get_table_details  s   













r%  )rK   )r   )1Service.utilsr   r   r   
sqlalchemyr   urllib.parser   pathlibr   cassandra.authr   cassandra.clusterr	   pymongor
   r   r   r   r   r   r   r   r   r  r   r!   r(   r+   r5   r    rS   pandasrW   r_   dateutilr`   rv   r|   r   r   r   r   r   r   r   r   r   r%  r   r   r   r   <module>   sN    8
!#$	&Y$