o
    8Di                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZmZ ejr5d dlmZ zd dlmZmZmZmZ ejd	d
d
kZW n eyW   dZY nw G dd deZ e!e Z"dd e#e"D Z$G dd dZ%G dd de&Z'G dd de'dZdS )    )annotationsN)auto)SqlglotError
TokenError)AutoName)
TrieResultin_trienew_trie)DialectType)	TokenizerTokenizerDialectSettingsTokenizerSettingsTokenTypeSettingsSQLGLOTRS_TOKENIZER1Fc                   @  s\
  e Zd Ze Ze Ze Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z e Z!e Z"e Z#e Z$e Z%e Z&e Z'e Z(e Z)e Z*e Z+e Z,e Z-e Z.e Z/e Z0e Z1e Z2e Z3e Z4e Z5e Z6e Z7e Z8e Z9e Z:e Z;e Z<e Z=e Z>e Z?e Z@e ZAe ZBe ZCe ZDe ZEe ZFe ZGe ZHe ZIe ZJe ZKe ZLe ZMe ZNe ZOe ZPe ZQe ZRe ZSe ZTe ZUe ZVe ZWe ZXe ZYe ZZe Z[e Z\e Z]e Z^e Z_e Z`e Zae Zbe Zce Zde Zee Zfe Zge Zhe Zie Zje Zke Zle Zme Zne Zoe Zpe Zqe Zre Zse Zte Zue Zve Zwe Zxe Zye Zze Z{e Z|e Z}e Z~e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z e Ze Ze Ze Ze Ze Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z e Z!e Z"e Z#e Z$e Z%e Z&e Z'e Z(e Z)e Z*e Z+e Z,e Z-e Z.e Z/e Z0e Z1e Z2e Z3e Z4e Z5e Z6e Z7e Z8e Z9e Z:e Z;e Z<e Z=e Z>e Z?e Z@e ZAe ZBe ZCe ZDe ZEe ZFe ZGe ZHe ZIe ZJe ZKe ZLe ZMe ZNe ZOe ZPe ZQe ZRe ZSe ZTe ZUe ZVe ZWe ZXe ZYe ZZe Z[e Z\e Z]e Z^e Z_e Z`e Zae Zbe Zce Zde Zee Zfe Zge Zhe Zie Zje Zke Zle Zme Zne Zoe Zpe Zqe Zre Zse Zte Zue Zve Zwe Zxe Zye Zze Z{e Z|e Z}e Z~e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze ZdS )	TokenTypeN(  __name__
__module____qualname__r   L_PARENR_PAREN	L_BRACKET	R_BRACKETL_BRACER_BRACECOMMADOTDASHPLUSCOLONDOTCOLONDCOLONDQMARK	SEMICOLONSTAR	BACKSLASHSLASHLTLTEGTGTENOTEQNEQNULLSAFE_EQCOLON_EQANDORAMPDPIPEPIPE_GTPIPE
PIPE_SLASHDPIPE_SLASHCARETCARET_ATTILDAARROWDARROWFARROWHASH
HASH_ARROWDHASH_ARROWLR_ARROWDATLT_ATAT_GTDOLLAR	PARAMETERSESSION_PARAMETERDAMPXORDSTAR	URI_STARTBLOCK_START	BLOCK_ENDSPACEBREAKSTRINGNUMBER
IDENTIFIERDATABASECOLUMN
COLUMN_DEFSCHEMATABLE	WAREHOUSESTAGE	STREAMLITVAR
BIT_STRING
HEX_STRINGBYTE_STRINGNATIONAL_STRING
RAW_STRINGHEREDOC_STRINGUNICODE_STRINGBITBOOLEANTINYINTUTINYINTSMALLINT	USMALLINT	MEDIUMINT
UMEDIUMINTINTUINTBIGINTUBIGINTINT128UINT128INT256UINT256FLOATDOUBLEUDOUBLEDECIMAL	DECIMAL32	DECIMAL64
DECIMAL128
DECIMAL256UDECIMAL
BIGDECIMALCHARNCHARVARCHARNVARCHARBPCHARTEXT
MEDIUMTEXTLONGTEXTBLOB
MEDIUMBLOBLONGBLOBTINYBLOBTINYTEXTNAMEBINARY	VARBINARYJSONJSONBTIMETIMETZ	TIMESTAMPTIMESTAMPTZTIMESTAMPLTZTIMESTAMPNTZTIMESTAMP_STIMESTAMP_MSTIMESTAMP_NSDATETIME	DATETIME2
DATETIME64SMALLDATETIMEDATEDATE32	INT4RANGEINT4MULTIRANGE	INT8RANGEINT8MULTIRANGENUMRANGENUMMULTIRANGETSRANGETSMULTIRANGE	TSTZRANGETSTZMULTIRANGE	DATERANGEDATEMULTIRANGEUUID	GEOGRAPHYNULLABLEGEOMETRYPOINTRING
LINESTRINGMULTILINESTRINGPOLYGONMULTIPOLYGON	HLLSKETCHHSTORESUPERSERIALSMALLSERIAL	BIGSERIALXMLYEARUSERDEFINEDMONEY
SMALLMONEY
ROWVERSIONIMAGEVARIANTOBJECTINET	IPADDRESSIPPREFIXIPV4IPV6ENUMENUM8ENUM16FIXEDSTRINGLOWCARDINALITYNESTEDAGGREGATEFUNCTIONSIMPLEAGGREGATEFUNCTIONTDIGESTUNKNOWNVECTORDYNAMICVOIDALIASALTERALWAYSALLANTIANYAPPLYARRAYASCASOFATTACHAUTO_INCREMENTBEGINBETWEENBULK_COLLECT_INTOCACHECASECHARACTER_SET
CLUSTER_BYCOLLATECOMMANDCOMMENTCOMMIT
CONNECT_BY
CONSTRAINTCOPYCREATECROSSCUBECURRENT_DATECURRENT_DATETIMECURRENT_SCHEMACURRENT_TIMECURRENT_TIMESTAMPCURRENT_USERDECLAREDEFAULTDELETEDESCDESCRIBEDETACH
DICTIONARYDISTINCTDISTRIBUTE_BYDIVDROPELSEENDESCAPEEXCEPTEXECUTEEXISTSFALSEFETCHFILE_FORMATFILTERFINALFIRSTFORFORCEFOREIGN_KEYFORMATFROMFULLFUNCTIONGETGLOBGLOBALGRANTGROUP_BYGROUPING_SETSHAVINGHINTIGNOREILIKE	ILIKE_ANYININDEXINNERINSERT	INTERSECTINTERVALINTO
INTRODUCERIRLIKEISISNULLJOINJOIN_MARKERKEEPKEYKILLLANGUAGELATERALLEFTLIKELIKE_ANYLIMITLISTLOADLOCKMAPMATCH_CONDITIONMATCH_RECOGNIZE	MEMBER_OFMERGEMODMODELNATURALNEXTNOTHINGNOTNULLNULLOBJECT_IDENTIFIEROFFSETONONLYOPERATORORDER_BYORDER_SIBLINGS_BYORDERED
ORDINALITYOUTEROVEROVERLAPS	OVERWRITE	PARTITIONPARTITION_BYPERCENTPIVOTPLACEHOLDER
POSITIONALPRAGMAPREWHEREPRIMARY_KEY	PROCEDURE
PROPERTIESPSEUDO_TYPEPUTQUALIFYQUOTERANGE	RECURSIVEREFRESHRENAMEREPLACE	RETURNING
REFERENCESRIGHTRLIKEROLLBACKROLLUPROWROWSSELECTSEMI	SEPARATORSEQUENCESERDE_PROPERTIESSETSETTINGSSHOW
SIMILAR_TOSOMESORT_BY
START_WITHSTORAGE_INTEGRATIONSTRAIGHT_JOINSTRUCT	SUMMARIZETABLE_SAMPLETAG	TEMPORARYTOPTHENTRUETRUNCATEUNCACHEUNIONUNNESTUNPIVOTUPDATEUSEUSINGVALUESVIEWVOLATILEWHENWHEREWINDOWWITHUNIQUEVERSION_SNAPSHOTTIMESTAMP_SNAPSHOTOPTIONSINKSOURCEANALYZE	NAMESPACEEXPORTHIVE_TOKEN_STREAM r  r  G/var/www/Datamplify/venv/lib/python3.10/site-packages/sqlglot/tokens.pyr      s   r   c                 C  s   i | ]\}}||qS r  r  ).0i
token_typer  r  r  
<dictcomp>  s    r  c                   @  sh   e Zd ZdZed"ddZed#d	d
Zed$ddZed%ddZ					d&d'ddZ	d(d d!Z
dS ))Tokenr  textlinecolstartendcommentsnumberintreturnc                 C  s   | t jt|S )z1Returns a NUMBER token with `number` as its text.)r   rQ   str)clsr  r  r  r  r    s   zToken.numberstringr  c                 C     | t j|S )z1Returns a STRING token with `string` as its text.)r   rP   )r  r  r  r  r  r       zToken.string
identifierc                 C  r  )z:Returns an IDENTIFIER token with `identifier` as its text.)r   rR   )r  r  r  r  r  r    r  zToken.identifiervarc                 C  r  )z,Returns an VAR token with `var` as its text.)r   r[   )r  r  r  r  r  r    r  z	Token.var   r   Nr  r   r  r  r  r  r  r  t.Optional[t.List[str]]Nonec                 C  s@   || _ || _|| _|| _|| _|| _|du rg | _dS || _dS )a  Token initializer.

        Args:
            token_type: The TokenType Enum.
            text: The text of the token.
            line: The line that the token ends on.
            col: The column that the token ends on.
            start: The start index of the token.
            end: The ending index of the token.
            comments: The comments to attach to the token.
        Nr  )selfr  r  r  r  r  r  r  r  r  r  __init__  s   zToken.__init__c                   s&   d  fdd jD }d| dS )Nz, c                 3  s$    | ]}| d t  | V  qdS )z: N)getattr)r  kr  r  r  	<genexpr>  s   " z!Token.__repr__.<locals>.<genexpr>z<Token >)join	__slots__)r  
attributesr  r  r  __repr__  s   zToken.__repr__)r  r  r  r  )r  r  r  r  )r  r  r  r  )r  r  r  r  )r  r  r   r   N)r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r   r   r   r  classmethodr  r  r  r  r  r  r  r  r  r  r    s"    r  c                      s   e Zd Z fddZ  ZS )
_Tokenizerc                   s  t  | |||d1dd d2 fd
d} j_ j_i dd j D |tjj	|tj
j|tjj|tjj|tjj|tjj_tj_tj_i tdd jD ddi_jjv rdjj< tfddg jjjjR D _ t!ret"d3i ddd j# D ddd j$ D ddd j D dj%djdjdjdjdd d j D d!t&j	d"t&jd#jd$j'd%d&d' j(D d(d)d' j)D d*j*d+j+d,j,d-jd.d/d' j-D }t.t/tj t/tj0 t/tj1 t/tj t/tj t/tj t/tj2 t/tj3 t/tj4 t/tj5 t/tj6 t/tj7 t/j8 t/tj9 d0}t:||_;S d _;S )4Narrt.List[str | t.Tuple[str, str]]r  t.Dict[str, str]c                 S  s   t dd | D S )Nc                 s  s2    | ]}t |tr||fn|d  |d fV  qdS )r   r  N
isinstancer  )r  itemr  r  r  r    s    "
z>_Tokenizer.__new__.<locals>._convert_quotes.<locals>.<genexpr>)dict)r  r  r  r  _convert_quotes  s   z+_Tokenizer.__new__.<locals>._convert_quotesr  r   $t.Dict[str, t.Tuple[str, TokenType]]c                   s    fdd|  D S )Nc                   s   i | ]	\}}|| fqS r  r  r  r  vr  r  r  r        zA_Tokenizer.__new__.<locals>._quotes_to_format.<locals>.<dictcomp>)items)r  r  )r  r  r  _quotes_to_format  s   z-_Tokenizer.__new__.<locals>._quotes_to_formatc                 S  s*   i | ]\}}d D ]
}|| |t jfqqS ))nN)r   r_   )r  sepr  r  r  r    s    z&_Tokenizer.__new__.<locals>.<dictcomp>c                 s  s2    | ]}t |tr|d fn|d |d fV  qd S )Nr   r  r  )r  commentr  r  r  r    s
     
z%_Tokenizer.__new__.<locals>.<genexpr>z{#z#}*/c                 3  s8    | ] d  v st  fddjD r  V  qdS ) c                 3  s    | ]}| v V  qd S Nr  )r  singlekeyr  r  r  "  s    z/_Tokenizer.__new__.<locals>.<genexpr>.<genexpr>N)anySINGLE_TOKENSupper)r  )klassr  r  r    s     white_spacec                 S     i | ]	\}}|t | qS r  _TOKEN_TYPE_TO_INDEXr  r  r  r  r  '  r  single_tokensc                 S  r  r  r  r  r  r  r  r  (  r  keywordsc                 S  r  r  r  r  r  r  r  r  )  r  numeric_literalsidentifiersidentifier_escapesstring_escapesquotesformat_stringsc                 S  s"   i | ]\}\}}||t | fqS r  r  )r  r  v1v2r  r  r  r  /  s    
has_bit_stringshas_hex_stringsr  var_single_tokenscommandsc                 S     h | ]}t | qS r  r  r  r  r  r  r  	<setcomp>7  s    z%_Tokenizer.__new__.<locals>.<setcomp>command_prefix_tokensc                 S  r  r  r  r  r  r  r  r  8      heredoc_tag_is_identifier%string_escapes_allowed_in_raw_stringsnested_comments
hint_starttokens_preceding_hintc                 S  r  r  r  r  r  r  r  r  ?  r  )
bit_stringbreak_dcolonheredoc_string
raw_string
hex_stringr  r  	parameter	semicolonr  r  heredoc_string_alternativehint)r  r  r  r  )r  r   r  r  r  r  r  )<super__new__QUOTES_QUOTESIDENTIFIERS_IDENTIFIERSr  r   r\   BIT_STRINGSr^   BYTE_STRINGSr]   HEX_STRINGSr`   RAW_STRINGSra   HEREDOC_STRINGSrb   UNICODE_STRINGS_FORMAT_STRINGSsetSTRING_ESCAPES_STRING_ESCAPESIDENTIFIER_ESCAPES_IDENTIFIER_ESCAPESr  COMMENTS	_COMMENTS
HINT_STARTKEYWORDSr	   _KEYWORD_TRIEUSE_RS_TOKENIZERRsTokenizerSettingsWHITE_SPACEr  NUMERIC_LITERALSboolVAR_SINGLE_TOKENSCOMMANDSCOMMAND_PREFIX_TOKENSHEREDOC_TAG_IS_IDENTIFIER%STRING_ESCAPES_ALLOWED_IN_RAW_STRINGSNESTED_COMMENTSTOKENS_PRECEDING_HINTRsTokenTypeSettingsr  rO   r!   rR   rQ   rF   r#   rP   r[   HEREDOC_STRING_ALTERNATIVEr  RsTokenizer_RS_TOKENIZER)r  clsnamebasesattrsr  settingstoken_types	__class__)r  r  r  r    s   
	

	

z_Tokenizer.__new__)r   r   r   r  __classcell__r  r  r8  r  r    s    r  c                   @  s  e Zd ZU i dejdejdejdejdejdej	dej
dejd	ejd
ejdejdejdejdejdejdejdejejejejejejejejejejejejejejdZ g Z!de"d< g Z#de"d< g Z$de"d< g Z%de"d< g Z&de"d< g Z'de"d< dgZ(de"d< dgZ)de"d< dgZ*e+ Z,de"d < g Z-d!e"d"< d#Z.ej/Z0d$Z1d$Z2d%Z3ej4ej5ej6ej7hZ8i Z9d&e"d'< i Z:d(e"d)< i Z;d&e"d*< e+ Z<de"d+< i Z=d&e"d,< e+ Z>de"d-< i Z?d.e"d/< d0Z@d1e"d2< i d3d4 d5D d6d4 d5D d7d4 d8D d9d4 d8D i e3ejAd:ejd;ejBd<ejCd=ejDd>ejEd?ejFd@ejGdAejGdBejHdCejIdDejJdEejKdFejLdGejMdHejNdIejOi dJejPdKejQdLejRdMejSdNejTdOejUdPejVdQejWdRejXdSejYdTejZdUej[dVej\dWej]dXej^dYej^dZej_i d[ej`d\ejad]ejbd^ejcd_ejdd`ejedaejfdbejgdcejhddejideejjdfejkdgejldhejmdiejndjejodkejpi dlejqdmejrdnejsdoejtdpejudqej7drejvdsejwdtejxduejydvejzdwej{dxej|dyej}dzej~d{ejd|eji d}ejd~ejdejdejdejdejdejdejdejdejdejdejdejdejdejRdejdeji dejdejTdejdejdejdejdej5dejdejdejdejdejdejdejdejdejdeji dejSdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdeji dejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdejdeji dejdejdejdejdejdejÓdejdejēdejœdejƓdejǓdejȓdej4dejɓdejʓdej˓dej̓i dej͓dejΓdejϓdejГdejѓdejғdejӓdejԓdejԓdejՓdej֓dejדdejؓdejdejٓdejړdej6i dejۓdejܓdejݓdejޓdejߓdejdejdejdejdejdejdejdejdejdejdejdeji dejdejdejdejdejdejdejdejdejdejdejdejd ejdejdejdejdeji dejdejdejdejd	ejd
ejdejdejdejdejdejdejdejdejdejdejdeji dejdejdejdejdejdejdejdejdejdej d ej d!ejd"ejd#ejd$ejd%ejd&eji d'ejd(ejd)ejd*ejd+ejd,ejd-ejd.ejd/ejd0ejd1ej	d2ej
d3ejd4ejd5ejd6ej
d7ej
i d8ejd9ejd:ejd;ejd<ejd=ejd>ejd?ejd@ejdAejdBejdCejdDejdEejdFejdGejdHeji dIejdJejdKejdLej dMej!dNej"dOej#dPej$dQej%dRej&dSej'dTej(dUej)dVej*dWej)dXej+dYej)ej)ej)ej,ej-ej.dZZ/d[e"d\< ej0ej0ej1ej1d]Z2d^e"d_< ej)ejejejejhZ3ejej_hZ4i Z5d&e"d`< dadbgZ6dcZ7	0	0dddldmZ8ddndoZ9ddsdtZ:dddwdxZ;dd{d|Z<ddddZ=e>dddZ?ddddZ@dddZAdddZBdddZCdddZDdddZEdddZFdddZGdddZHdddZI	0	#	$ddddZJdddZKd0S (  r   ()[]{}&^:,.-=r  <%!)|+;/\*~?@#'`"r  r  r  r  r  r  r  rW  r  rU  zt.List[t.Tuple[str, str] | str]r  z
t.Set[str]r(  zt.List[str]r  FTz/*+r  r  r  r  r  r  r  r  zt.Dictr"  Nzt.Optional[t.Any]r2  c                 C     i | ]	}d | t jqS )z{%r   rL   r  postfixr  r  r  r    r  zTokenizer.<dictcomp>) rL  rF  c                 C     i | ]	}| d t jqS )z%}r   rM   r  prefixr  r  r  r    r  c                 C  rX  )z{{rY  rZ  r  r  r  r    r  )rL  rF  c                 C  r]  )z}}r^  r_  r  r  r  r    r  z==::z||z|>z>=z<=z<>z!=z:=z<=>z->z->>z=>z#>z#>>z<->z&&z??z~~~z~~z~~*z~*r   r   r0   r   r   r   ASr   AUTOINCREMENTr   r   r   r   r  r   zCHARACTER SETz
CLUSTER BYr   rT   r   z
CONNECT BYr   r   r   r   r   r   r   r   r   r   rS   r   r   r   r   r   zDISTRIBUTE BYr  r  r  r  r   r  r  r  r  r	  r
  r  r  r  r  r  zFOREIGN KEYr  r  r   r   r  zGROUP BYzGROUPING SETSr  r  r!  r"  r   r#  r$  r&  r%  r'  r*  r+  r,  r.  r0  r2  r3  r4  r6  r8  r9  r>  r  rA  rB  r+   rD  rE  r   rG  rH  r1   rI   zORDER BYrN  rO  rP  rQ  rR  rS  zPARTITION BYzPARTITIONED BYPARTITIONED_BYrU  rV  rY  zPRIMARY KEYr\  r`  rb  rc  REGEXPre  rf  rg  rh  ri  rj  rk  rl  rm  rn  rV   ro  rp  rt  ru  rv  z
SIMILAR TOrx  zSORT BYz
START WITHr|  rW   TABLESAMPLETEMPr  r  r  r  r  r   r  r  r  r  r  r   r  r  r  r  r  r  r  r   r   rc   BOOLrd   BYTEri   INT1re   INT16SHORTrg   HUGEINTUHUGEINTINT2INTEGERrk   INT4INT32INT64ro   rq   LONGrm   INT8rl   rp   rr   DECrv   rw   rx   ry   rz   r|   
BIGNUMERICr7  r:  r   rQ   NUMERICFIXEDREALrs   FLOAT4FLOAT8rt   zDOUBLE PRECISIONr   r   r}   	CHARACTERzCHAR VARYINGzCHARACTER VARYINGr~   r   VARCHAR2r   	NVARCHAR2r   STRrP   r   r   r   r   CLOBLONGVARCHARr   r   r   r   r   BYTEAr   r   r   r   r   r   TIMESTAMP_LTZr   TIMESTAMP_NTZr   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r}  rr  r   r   r  CALLr   EXPLAINr  OPTIMIZE)PREPAREVACUUMzUSER-DEFINEDzFOR VERSIONzFOR TIMESTAMPzt.Dict[str, TokenType]r!  )r  	
z"t.Dict[t.Optional[str], TokenType]r%  r&  z--)z/*r  )sqlsizetokensdialectuse_rs_tokenizer_start_current_line_col	_comments_char_end_peek_prev_token_line_rs_dialect_settingsr  r
   r  t.Optional[bool]optst.Anyr  r  c                 K  sX   ddl m} ||| _|d ur|nt| _| jr&t| jj| jj| jj	d| _
|   d S )Nr   )Dialect)unescaped_sequences identifiers_can_start_with_digit#numbers_can_be_underscore_separated)sqlglot.dialectsr  get_or_raiser  r#  r  RsTokenizerDialectSettingsUNESCAPED_SEQUENCES IDENTIFIERS_CAN_START_WITH_DIGIT#NUMBERS_CAN_BE_UNDERSCORE_SEPARATEDr  reset)r  r  r  r  r  r  r  r  r    s   zTokenizer.__init__c                 C  sL   d| _ d| _g | _d| _d| _d| _d| _g | _d| _d| _	d| _
d| _d S )Nr\  r   r  F)r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r    s   
zTokenizer.resetr  r  t.List[Token]c              
   C  s   | j r| |S |   || _t|| _z|   W | jS  tyK } z#t| j	d d}t
| j	d | jd }| j|| }td| d|d}~ww )z?Returns a list of tokens corresponding to the SQL string `sql`.2   r   r  zError tokenizing 'rU  N)r  tokenize_rsr  r  lenr  _scan	Exceptionmaxr  minr   r  )r  r  r  r  r  contextr  r  r  tokenize"  s   


zTokenizer.tokenizeuntilt.Optional[t.Callable]c                 C  s  | j rm| jsm| j}|| j k r*| j| }| r$|dks|dkr$|d7 }nn|| j k s|| jkr4|| j nd}|| _| | | j sa| j rM| 	  n| j| j
v r]| | j
| j  n|   |rg| rgn| j rm| jr| jr| jr| jd j| j d S d S d S )Nr  r  r  r  )r  r  r  r  isspacer  _advancer  isdigit_scan_numberr  _scan_identifier_scan_keywordsr  r  r  extend)r  r  currentcharoffsetr  r  r  r  5  s.   








zTokenizer._scanr  r  c                 C  s<   |dkr| j S | jd }|| }|| jkr| j|| S dS )Nr  r\  )r  r  r  r  )r  r  r  r  r  r  r  _charsU  s
   
zTokenizer._charsr  r  alnumr'  c                 C  s(  | j | jtju r| jdkr| jdks|| _|  jd7  _n|  j|7  _|  j|7  _| j| j	k| _
| j| jd  | _| j
rBdn| j| j | _|r| j r| j}| j}| j
}| j}| rz|d7 }|d7 }|| j	k}|rqdn| j| }| s`|| _|| _|| _
|| _| j|d  | _d S d S d S )Nr  r  r  r\  )r%  getr  r   rO   r  r  r  r  r  r  r  isalnum)r  r  r  r  r  r  r  r  r  r  r  ^  s6   
zTokenizer._advancec                 C  s   | j | j| j S r  )r  r  r  r  r  r  r  _text  s   zTokenizer._textr  r   r  t.Optional[str]c                   s   j  _ jr|tjkr jr jd j j g  _ jt	||d u r( j
n| j  j j jd  jd g  _| jv r jdkrt jdksV jd j jv r j}t j}  fdd  jd |  _ j| j  }|r tj| d S d S d S d S d S )Nr  r  )r  r  r  r  r  r  rM  c                     s
    j dkS )NrM  )r  r  r  r  r  <lambda>  s   
 z Tokenizer._add.<locals>.<lambda>)r  r  r  r   r#   r  r  r  appendr  r  r  r  r  r)  r  r  r  r*  r  r  strip_addrP   )r  r  r  r  r  r  r  r  r    s<   

 
zTokenizer._addc                 C  sb  d}d }| j }|}d}d}| j}|| jv }|rj|rtj}	n	t|| \}	}|	tjkr,n>|	tjkr3|}| j	| }
|d7 }|
| j
k re| j|
 }|pL|| jv }| }|rU|sb|rYd}||7 }|}d}nd}nd}n|s|r| |rsd S | |rzd S |s|s|s| |d  | }| j| j| |d d S | j| jv r| j| j| j | jd d S |   d S )Nr   Fr  r  Tr\  )r  )r  r"  r  r   PREFIXr   r  FAILEDr  r  r  r  r  _scan_string_scan_commentr  r  r!  r  	_scan_var)r  r  wordcharsr  
prev_spaceskiptriesingle_tokenresultr  is_spacer  r  r  r    s\   







zTokenizer._scan_keywordscomment_startc                 C  s  || j vrdS | j}t|}| j | }|rf| | d}t|}| jsP| ||kr1|d8 }|s1n| jdd | jrM| jsM| ||krM| | |d7 }| jr#| j| j	|| d   | |d  n+| js| j
| jtjur| jdd | js| j
| jtjuss| j| j	|d   || jkr| jr| jd j| jv r| tj || jkr| jd j| j g | _| j| _dS )NFr  Tr  r  )r  r  r  r  r  r  r-  r  r  r  r%  r  r  r   rO   r   r  r  r.  r  r  r  r  r  )r  r  comment_start_linecomment_start_sizecomment_endcomment_countcomment_end_sizer  r  r  r    sN   





zTokenizer._scan_commentc                 C  s  | j dkr,| j }|dkr| jr|  S | tjS |dkr,| jr&| 	 S | tjS d}d}	 | j
 r;|   n| jdkr[|s[| jrT| jd jtjkrT| tjS d}|   n| jd	v rm|d
krm|d
7 }|   n| j dkr|s|d
7 }|   n| j r| j}d}| j r| j| jvr|| j7 }|   | j r| j| jvs| j| j| d}|r| tj| | tjd | ||S |dd}| jjr|
 r| tj|| S | jjr| tjS | t|  | tj|S | tjS q1)N0BXFr   TrE  r  )rF  rL  r  Er\  ra  _)r  r  r  r  
_scan_bitsr  r   rQ   r  	_scan_hexr  r  r  r  rF   isidentifierr  r  r  r!  r  r&  r!   replacer  r  r  r[   r  )r  peekdecimal
scientificnumber_textliteralr  replacedr  r  r  r    sV   








zTokenizer._scan_numberc                 C  sX   |    |  }zt|d | tj|dd   W d S  ty+   | tj Y d S w )N   )r  _extract_valuer  r  r   r\   
ValueErrorrR   r  valuer  r  r  r  J     
zTokenizer._scan_bitsc                 C  sX   |    |  }zt|d | tj|dd   W d S  ty+   | tj Y d S w )N   r  )r  r  r  r  r   r]   r  rR   r  r  r  r  r  T  r  zTokenizer._scan_hexc                 C  s2   	 | j  }|r|| jvr| jdd n	 | jS q)NTr  )r  r  r  r  r  r  r  r  r  r  r  ^  s   
zTokenizer._extract_valuer  c                 C  sX  d }t j}|| jv r| j| }nf|| jv rt| j| \}}|t jkr$d}nR|t jkr,d}nJ|t jkrs|   | j|kr=d}n
| j	|d| j
 d}|rk| j
rk| jsS| sk| js[| d | t|  | | j dS | | | }ndS | t| | j	||t jkd}|rzt|| W n ty   td	| j d
| j w | || dS )Nr  r  r\  T)r  raise_unmatchedr  F)r  z0Numeric string contains invalid characters from rC  )r   rP   r  r  r]   r\   ra   r  r  _extract_stringr+  r  r  r  r  r0  r`   r  r  r   r  r  )r  r  baser  r  tagr  r  r  r  r  h  sP   






zTokenizer._scan_stringidentifier_endc                 C  s0   |    | j|| j|hB d}| tj| d S )N)escapes)r  r  r  r  r   rR   )r  r  r  r  r  r  r    s
   zTokenizer._scan_identifierc                 C  sz   	 | j  }|r|| jv s|| jvr| jdd nnq| | jr/| jd jtj	kr/tj
 d S | j| j tj
 d S )NTr  r  )r  r  r(  r  r  r  r  r  r   rF   r[   r!  r  r  r  r  r  r  r  r    s   
zTokenizer._scan_var	delimiterr  t.Optional[t.Set[str]]r  r  c           	      C  s  d}t |}|d u r| jn|}	 |s6| jjr6| jr6| j| jv r6| jj| j| j }|r6| d ||7 }q| j	s;|s| j|v r| j|ksJ| j|v r| j| j
vsV| j| jkr| j|kra|| j7 }n|| j| j 7 }| jd | jk rw| d nTtd| d| j d| j | ||kr|dkr| |d  	 |S | jr|s|| j S td| d| j d| j | jd }| jdd || j|| jd  7 }q)	Nr\  Tr  r  zMissing z from rC  r  )r  r  r  r  r  r  r  r  r  r,  r  r  r  r   r  r  r  r  r  )	r  r  r  r  r  r  
delim_sizeunescaped_sequencer  r  r  r  r    sT   




zTokenizer._extract_stringc                 C  sR   | j std| j || j\}}|D ]}t|j |_q|| _|d ur't||S )NzRust tokenizer is not available)	r2  r   r  r  _ALL_TOKEN_TYPEStoken_type_indexr  r  r   )r  r  r  	error_msgtokenr  r  r  r    s   zTokenizer.tokenize_rs)NN)r  r
   r  r  r  r  r  r  )r  r  )r  r  r  r  r  )r  r  r  r  )r  r  r  r  )r  F)r  r  r  r'  r  r  r  )r  r   r  r  r  r  )r  r  r  r'  )r  r  r  r'  )r  r  r  r  )NFT)
r  r  r  r  r  r'  r  r'  r  r  (L  r   r   r   r   r   r   r   r   r   r   r2   r8   r   r   r   r   r,   r)   r'   r?  r+   r5   r   r#   r&   r%   r$   r:   rW  rF   r>   r   r  r  __annotations__r  r  r  r  r  r  r  r  r  r(  r  r+  r[   r0  r,  r-  r   ro  r$  r  r   r.  r  r  r  r  r  r  r"  r2  r  r!   r3   r4   r*   r(   r-   r/   r.   r;   r<   r=   r?   r@   rA   rH   r"   r  r4  r  r)  r   r   r0   r   r   r   r   r   r   r   r   r   r  r   r   r   r   rT   r   r   r   r   r   r   r   r   r   r   r   r   rS   r   r   r   r   r   r  r  r  r  r   r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r   r   r  r  r  r!  r"  r   r#  r&  r%  r'  r*  r+  r,  r.  r0  r2  r3  r6  r8  r9  r>  r  rA  rB  rD  rE  r   rG  rH  r1   rI   rK  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rY  r[  r\  r`  rb  rc  rj  re  rf  rg  rh  ri  rk  rl  rm  rn  rV   rp  rt  ru  rv  rw  rx  ry  rz  r|  rW   r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r   r   rc   rd   re   ri   rg   ro   rp   rk   rm   rq   rl   rr   rv   rw   rx   ry   rz   r|   r7  r:  r   rs   rt   r   r   r}   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r}  rr  r   r   r  r   r   r  r   r  r  r!  rN   rO   r%  r)  r*  r&  r  r  r  r  r  r  r  r  propertyr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   Z  s  
 	
"	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~                 	  
                                               !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?  @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _  `  a  b  c  d  e  f  g  h  i  j  k  
l  
m  
n  
o  
p  
q  
r  
s  
t  
u  
v  
w  
x  
y  
z  
{  
|  
}  
~  
   
    
    
    
    
    
    
    
    
	    

    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
     
!    
"    
#    
$    
%    
&    
'    (    0 	!#:73


38r   )	metaclass)(
__future__r   ostypingtenumr   sqlglot.errorsr   r   sqlglot.helperr   sqlglot.trier   r   r	   TYPE_CHECKINGsqlglot.dialects.dialectr
   	sqlglotrsr   r1  r   r  r   r$  r   r/  environr  r#  ImportErrorr   listr  	enumerater  r  typer  r  r  r  r  <module>   s0       9k