o
    QDiE                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlmZmZm	Z	m
Z
mZ d dlZddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ erdddlmZ ddlmZ ejdddG dd dZ dS )    )annotationsN)copydeepcopy)TYPE_CHECKINGAnyDictListOptional   )helperssettings)Dialect)FrictionlessException)AnyField)Metadata)platform)FieldSchema)types)ResourceTF)kw_onlyreprc                   @  s~  e Zd ZU dZejZded< 	 ejZ	ded< 	 dZ
ded< 	 ejZded	< 	 dZd
ed< 	 dZded< 	 ejZded< 	 ejZded< 	 ejejjdZded< 	 ejejjdZded< 	 ejejjdZded< 	 dZded< 	 dZded< 	 edddPdd ZdQd$d%Z dd&dRd+d,Z!dd-dSd3d4Z"ddej#d5dTd?d@Z$edUdEdFZ%edVdHdIZ&dWdJdKZ'edXdNdOZ(dS )YDetectorzDetector representation.

    This main purpose of this class is to set the parameters to define
    how different aspects of metadata are detected.

    intbuffer_sizesample_sizeNz!Optional[types.IEncodingFunction]encoding_functionfloatencoding_confidenceOptional[str]
field_typeOptional[List[str]]field_namesfield_confidenceboolfield_float_numbers)factory	List[str]field_missing_valuesfield_true_valuesfield_false_valuesFschema_synczOptional[Dict[str, Any]]schema_patch)formatsourcer   r-   returnc                C  s   t | } t| trFtj D ]\}}| t|d r |  S q|p)t 	| d }|dv rFztj
d }tj| |d} W n	 tyE   Y nw t| trdtj D ]\}}t|d |  rc|  S qPdS dS )z4Return an descriptor type as 'resource' or 'package'names   )jsonyaml
   )sizepropsN)r   normalize_source
isinstancestrr   METADATA_TRAITSitemsendswithtupleparse_scheme_and_formatDEFAULT_BUFFER_SIZEr   metadata_retrieve	Exceptiondictsetintersectionkeys)r.   r-   typeitemr5    rH   W/var/www/Datamplify/venv/lib/python3.10/site-packages/frictionless/detector/detector.pydetect_metadata_type   s*   



zDetector.detect_metadata_typeresourcer   Nonec           	      C  s  d}d}d}d}|j rrg }|j g|jpg  D ]}tj tj |d }|| qtj |}tj|dd}|p:d}t	|j \}}|t
jv rr|}|j dt| d  }|jratj ||j}t	|\}}|rrtj |d }|jpv||_|jp|||_|jp||_|jp||_dS )zDetects path detailsmemoryNr   z[^-a-z0-9._/])regex_patternnamer1   )path
extrapathsossplitextbasenameappendcommonprefixr   slugifyr>   r   COMPRESSION_FORMATSlen	innerpathjoinrO   schemer-   compression)	selfrK   rO   r\   r-   r]   r0   partrP   rH   rH   rI   detect_resource   s2   
zDetector.detect_resource)encodingbuffertypes.IBufferra   r9   c                C  s   | j r|  |S |s=tj }| D ]}|| q|  |jd p&tj	}|jd p-d}|| j
k r6tj	}|dkr=tj	}t|j}|dkrQ|tjrOd}|S |dkr_|tjr]d}|S |d	krk|tjrkd}|S )
zDetect encoding from buffer

        Parameters:
            buffer (byte): byte buffer

        Returns:
            str: encoding
        ra   
confidencer   asciizutf-8z	utf-8-sigz	utf-16-bezutf-16z	utf-16-le)r   r   chardetUniversalDetector
splitlinesfeedcloseresultr   DEFAULT_ENCODINGr   codecslookuprO   
startswithBOM_UTF8BOM_UTF16_BEBOM_UTF16_LE)r^   rb   ra   detectorlinerd   rH   rH   rI   detect_encoding   s4   


	zDetector.detect_encoding)dialectsampletypes.ISamplerv   Optional[Dialect]r   c                C  s   |pt  }| }dd |D }|rn|dsn|dsntt|t| }tt|d d}tt|| || d }t	j
 }t|ddD ]\}	}
|rS||	|
sSqGt|
|v r^|	g} nqG|sfd|_|S |t	j
krn||_|S )	zDetect dialect from sample

        Parameters:
            sample (any[][]): data sample
            dialect? (Dialect): file dialect

        Returns:
            Dialect: dialect
        c                 S  s   g | ]}t |qS rH   )rY   ).0cellsrH   rH   rI   
<listcomp>      z+Detector.detect_dialect.<locals>.<listcomp>headerheader_rowsg?r1   )startF)r   create_comment_filterhas_definedroundsumrY   maxlistranger   DEFAULT_HEADER_ROWSr   	enumerater~   r   )r^   rw   rv   comment_filterwidthswidthdriftmatchr   
row_numberr{   rH   rH   rI   detect_dialect   s6   




zDetector.detect_dialect)labelsschemafield_candidatesfragmenttypes.IFragmentr   r   Optional[Schema]r   List[Dict[str, Any]]optionsr   c          "      K  s^  |st g d}| jtjkr| j|_t| jp|pg }ttdd |}|s:|s*|S dd t	dt
|d d D }t|D ]\}}|pJd|d  ||< q>t
|t
t|krg }	| }t|D ]\}}|	|d }
|
dkrwd	||
f n|||< |	| qb| js|s| jptj}g |_|D ]}t||d
}|| q|S g }g }|D ]9}| }d|d< t|}|jdkr| jrd|_n|jdkr| jtjkr| j|_| jtjkr| j|_|| qt|D ]\}}|g  |D ]}|| |dd qqdgt
| }t
|gt
| }t
|| jd  }|D ]}t|D ]z\}}|| dur.q!t
||kr9|| nd}|| jv }|rK||  d8  < || D ]J}|d |k rZqO|ss|d |\}}|d  |sodnd7  < || dkr|d || | j kr|d   }||_!||_"|||<  nqOq!qt|D ]\}}|| du rt#||d||< q||_| j$r|r|d }|sdd |D }t
|t
t|krd}t%|| &|j|}| '||| | (|||| | j)r-t*| j)}|+di }|, }|-| |.dg D ]}|.d} |.| i }!|-|! qt |}|S )zDetect schema from fragment

        Parameters:
            fragment (any[][]): data fragment
            labels? (str[]): data labels
            schema? (Schema): data schema

        Returns:
            Schema: schema
        )fieldsc                 S  s   |  dd S )N
 )replacestrip)cellrH   rH   rI   <lambda>D  s    z(Detector.detect_schema.<locals>.<lambda>c                 S  s   g | ]}d | qS )fieldrH   )rz   numberrH   rH   rI   r|   H  s    z*Detector.detect_schema.<locals>.<listcomp>r1   r   r   z%s%srO   rF   sharedrO   r   Tboolean)r   scoreNr   )rO   r   header_casec                 S     g | ]}|  qS rH   lower)rz   labelrH   rH   rI   r|     r}   z2"schema_sync" requires unique labels in the headerr   )/r   r(   r   DEFAULT_MISSING_VALUESmissing_valuesr   r"   r   mapr   rY   r   rC   countrU   r    DEFAULT_FIELD_TYPEr   r   from_descriptor	add_fieldrF   r%   float_numberr)   DEFAULT_TRUE_VALUEStrue_valuesr*   DEFAULT_FALSE_VALUESfalse_valuesr#   	read_cellto_copyrO   r   r   r+   r   mapped_schema_fields_names$rearrange_schema_fields_given_labels,add_missing_required_labels_to_schema_fieldsr,   r   popto_descriptorupdateget)"r^   r   r   r   r   r   r0   indexrO   
seen_namesr   rF   r   runnersrunner_fields	candidate
descriptorr   	max_score	thresholdr{   r.   is_field_missing_valuerunner_notescase_sensitivenotemapped_fieldspatchpatch_fieldsfield_descriptor
field_namefield_patchrH   rH   rI   detect_schema&  s   
 








zDetector.detect_schemar   List[Field]r   Dict[str, Field]c                 C  s    |r	dd | D S dd | D S )z:Create a dictionnary to map field names with schema fieldsc                 S  s   i | ]}|j |qS rH   )rO   rz   r   rH   rH   rI   
<dictcomp>  r}   z7Detector.mapped_schema_fields_names.<locals>.<dictcomp>c                 S  s   i | ]}|j  |qS rH   )rO   r   r   rH   rH   rI   r     s    rH   )r   r   rH   rH   rI   r     s   z#Detector.mapped_schema_fields_namesfields_mappingc                 C  s<   |   |D ]}t|dd}| ||}|| qdS )zeRearrange fields according to the order of labels. All fields
        missing from labels are droppedanyr   N)clear_fieldsr   r   r   r   )r   r   r   rO   default_fieldr   rH   rH   rI   r     s   z-Detector.rearrange_schema_fields_given_labelsc                 C  s6   |  D ]\}}| |||r||vr|| qdS )zvThis method aims to add missing required labels and
        primary key field not in labels to schema fields.
        N)r;   field_is_requiredr   )r^   r   r   r   r   rO   r   rH   rH   rI   r     s   

z5Detector.add_missing_required_labels_to_schema_fieldsr   r   c                 C  s:   |r| j p
| j|jv S dd |jD }| j p| j |v S )Nc                 S  r   rH   r   )rz   pkrH   rH   rI   r|     r}   z.Detector.field_is_required.<locals>.<listcomp>)requiredrO   primary_keyr   )r   r   r   lower_primary_keyrH   rH   rI   r     s   zDetector.field_is_required)r.   r   r-   r   r/   r   )rK   r   r/   rL   )rb   rc   ra   r   r/   r9   )rw   rx   rv   ry   r/   r   )r   r   r   r!   r   r   r   r   r   r   r/   r   )r   r   r   r$   r/   r   )r   r   r   r   r   r'   )r   r   r   r   r   r'   r   r$   )r   r   r   r   r   r$   r/   r$   ))__name__
__module____qualname____doc__r   r?   r   __annotations__DEFAULT_SAMPLE_SIZEr   r   DEFAULT_ENCODING_CONFIDENCEr   r    r"   DEFAULT_FIELD_CONFIDENCEr#   DEFAULT_FLOAT_NUMBERSr%   attrsr   r   r   r(   r   r)   r   r*   r+   r,   staticmethodrJ   r`   ru   r   DEFAULT_FIELD_CANDIDATESr   r   r   r   r   rH   rH   rH   rI   r      sl   
 
#39 	
r   )!
__future__r   rm   rR   r   r   typingr   r   r   r   r	   r    r   r   rv   r   	exceptionr   r   r   metadatar   r   r   r   r   r   rK   r   definer   rH   rH   rH   rI   <module>   s$    