sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28) 29from sqlglot.generator import unsupported_args 30from sqlglot.helper import flatten, is_float, is_int, seq_get 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E 35 36 37# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 38def _build_datetime( 39 name: str, kind: exp.DataType.Type, safe: bool = False 40) -> t.Callable[[t.List], exp.Func]: 41 def _builder(args: t.List) -> exp.Func: 42 value = seq_get(args, 0) 43 int_value = value is not None and is_int(value.name) 44 45 if isinstance(value, exp.Literal): 46 # Converts calls like `TO_TIME('01:02:03')` into casts 47 if len(args) == 1 and value.is_string and not int_value: 48 return ( 49 exp.TryCast(this=value, to=exp.DataType.build(kind)) 50 if safe 51 else exp.cast(value, kind) 52 ) 53 54 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 55 # cases so we can transpile them, since they're relatively common 56 if kind == exp.DataType.Type.TIMESTAMP: 57 if int_value and not safe: 58 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 59 # it's not easily transpilable 60 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 61 if not is_float(value.this): 62 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 63 expr.set("safe", safe) 64 return expr 65 66 if kind == exp.DataType.Type.DATE and not int_value: 67 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 68 formatted_exp.set("safe", safe) 69 return formatted_exp 70 71 return exp.Anonymous(this=name, expressions=args) 72 73 return _builder 74 75 76def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 77 expression = parser.build_var_map(args) 78 79 if isinstance(expression, exp.StarMap): 80 return expression 81 82 return exp.Struct( 83 expressions=[ 84 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 85 ] 86 ) 87 88 89def _build_datediff(args: t.List) -> exp.DateDiff: 90 return exp.DateDiff( 91 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 92 ) 93 94 95def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 96 def _builder(args: t.List) -> E: 97 return expr_type( 98 this=seq_get(args, 2), 99 expression=seq_get(args, 1), 100 unit=map_date_part(seq_get(args, 0)), 101 ) 102 103 return _builder 104 105 106# https://docs.snowflake.com/en/sql-reference/functions/div0 107def _build_if_from_div0(args: t.List) -> exp.If: 108 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)).and_( 109 exp.Is(this=seq_get(args, 0), expression=exp.null()).not_() 110 ) 111 true = exp.Literal.number(0) 112 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 113 return exp.If(this=cond, true=true, false=false) 114 115 116# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 117def _build_if_from_zeroifnull(args: t.List) -> exp.If: 118 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 119 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 120 121 122# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 123def _build_if_from_nullifzero(args: t.List) -> exp.If: 124 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 125 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 126 127 128def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 129 flag = expression.text("flag") 130 131 if "i" not in flag: 132 flag += "i" 133 134 return self.func( 135 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 136 ) 137 138 139def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 140 regexp_replace = exp.RegexpReplace.from_arg_list(args) 141 142 if not regexp_replace.args.get("replacement"): 143 regexp_replace.set("replacement", exp.Literal.string("")) 144 145 return regexp_replace 146 147 148def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 149 def _parse(self: Snowflake.Parser) -> exp.Show: 150 return self._parse_show_snowflake(*args, **kwargs) 151 152 return _parse 153 154 155def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 156 trunc = date_trunc_to_time(args) 157 trunc.set("unit", map_date_part(trunc.args["unit"])) 158 return trunc 159 160 161def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 162 """ 163 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 164 so we need to unqualify them. 165 166 Example: 167 >>> from sqlglot import parse_one 168 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 169 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 170 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 171 """ 172 if isinstance(expression, exp.Pivot) and expression.unpivot: 173 expression = transforms.unqualify_columns(expression) 174 175 return expression 176 177 178def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 179 assert isinstance(expression, exp.Create) 180 181 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 182 if expression.this in exp.DataType.NESTED_TYPES: 183 expression.set("expressions", None) 184 return expression 185 186 props = expression.args.get("properties") 187 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 188 for schema_expression in expression.this.expressions: 189 if isinstance(schema_expression, exp.ColumnDef): 190 column_type = schema_expression.kind 191 if isinstance(column_type, exp.DataType): 192 column_type.transform(_flatten_structured_type, copy=False) 193 194 return expression 195 196 197def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression: 198 if isinstance(expression, exp.Select): 199 for unnest in expression.find_all(exp.Unnest): 200 if ( 201 isinstance(unnest.parent, (exp.From, exp.Join)) 202 and len(unnest.expressions) == 1 203 and isinstance(unnest.expressions[0], exp.GenerateDateArray) 204 ): 205 generate_date_array = unnest.expressions[0] 206 start = generate_date_array.args.get("start") 207 end = generate_date_array.args.get("end") 208 step = generate_date_array.args.get("step") 209 210 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 211 continue 212 213 unit = step.args.get("unit") 214 215 unnest_alias = unnest.args.get("alias") 216 if unnest_alias: 217 unnest_alias = unnest_alias.copy() 218 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 219 else: 220 sequence_value_name = "value" 221 222 # We'll add the next sequence value to the starting date and project the result 223 date_add = _build_date_time_add(exp.DateAdd)( 224 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 225 ).as_(sequence_value_name) 226 227 # We use DATEDIFF to compute the number of sequence values needed 228 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 229 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 230 ) 231 232 unnest.set("expressions", [number_sequence]) 233 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 234 235 return expression 236 237 238class Snowflake(Dialect): 239 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 240 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 241 NULL_ORDERING = "nulls_are_large" 242 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 243 SUPPORTS_USER_DEFINED_TYPES = False 244 SUPPORTS_SEMI_ANTI_JOIN = False 245 PREFER_CTE_ALIAS_COLUMN = True 246 TABLESAMPLE_SIZE_IS_PERCENT = True 247 COPY_PARAMS_ARE_CSV = False 248 ARRAY_AGG_INCLUDES_NULLS = None 249 250 TIME_MAPPING = { 251 "YYYY": "%Y", 252 "yyyy": "%Y", 253 "YY": "%y", 254 "yy": "%y", 255 "MMMM": "%B", 256 "mmmm": "%B", 257 "MON": "%b", 258 "mon": "%b", 259 "MM": "%m", 260 "mm": "%m", 261 "DD": "%d", 262 "dd": "%-d", 263 "DY": "%a", 264 "dy": "%w", 265 "HH24": "%H", 266 "hh24": "%H", 267 "HH12": "%I", 268 "hh12": "%I", 269 "MI": "%M", 270 "mi": "%M", 271 "SS": "%S", 272 "ss": "%S", 273 "FF": "%f", 274 "ff": "%f", 275 "FF6": "%f", 276 "ff6": "%f", 277 } 278 279 def quote_identifier(self, expression: E, identify: bool = True) -> E: 280 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 281 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 282 if ( 283 isinstance(expression, exp.Identifier) 284 and isinstance(expression.parent, exp.Table) 285 and expression.name.lower() == "dual" 286 ): 287 return expression # type: ignore 288 289 return super().quote_identifier(expression, identify=identify) 290 291 class Parser(parser.Parser): 292 IDENTIFY_PIVOT_STRINGS = True 293 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 294 COLON_IS_VARIANT_EXTRACT = True 295 296 ID_VAR_TOKENS = { 297 *parser.Parser.ID_VAR_TOKENS, 298 TokenType.MATCH_CONDITION, 299 } 300 301 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 302 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 303 304 FUNCTIONS = { 305 **parser.Parser.FUNCTIONS, 306 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 307 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 308 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 309 this=seq_get(args, 1), expression=seq_get(args, 0) 310 ), 311 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 312 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 313 start=seq_get(args, 0), 314 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 315 step=seq_get(args, 2), 316 ), 317 "BITXOR": binary_from_function(exp.BitwiseXor), 318 "BIT_XOR": binary_from_function(exp.BitwiseXor), 319 "BOOLXOR": binary_from_function(exp.Xor), 320 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 321 "DATE_TRUNC": _date_trunc_to_time, 322 "DATEADD": _build_date_time_add(exp.DateAdd), 323 "DATEDIFF": _build_datediff, 324 "DIV0": _build_if_from_div0, 325 "FLATTEN": exp.Explode.from_arg_list, 326 "GET_PATH": lambda args, dialect: exp.JSONExtract( 327 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 328 ), 329 "IFF": exp.If.from_arg_list, 330 "LAST_DAY": lambda args: exp.LastDay( 331 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 332 ), 333 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 334 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 335 "LISTAGG": exp.GroupConcat.from_arg_list, 336 "NULLIFZERO": _build_if_from_nullifzero, 337 "OBJECT_CONSTRUCT": _build_object_construct, 338 "REGEXP_REPLACE": _build_regexp_replace, 339 "REGEXP_SUBSTR": lambda args: exp.RegexpExtract( 340 this=seq_get(args, 0), 341 expression=seq_get(args, 1), 342 position=seq_get(args, 2), 343 occurrence=seq_get(args, 3), 344 parameters=seq_get(args, 4), 345 group=seq_get(args, 5) or exp.Literal.number(0), 346 ), 347 "RLIKE": exp.RegexpLike.from_arg_list, 348 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 349 "TIMEADD": _build_date_time_add(exp.TimeAdd), 350 "TIMEDIFF": _build_datediff, 351 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 352 "TIMESTAMPDIFF": _build_datediff, 353 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 354 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 355 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 356 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 357 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 358 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 359 "TRY_TO_TIMESTAMP": _build_datetime( 360 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 361 ), 362 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 363 "TO_NUMBER": lambda args: exp.ToNumber( 364 this=seq_get(args, 0), 365 format=seq_get(args, 1), 366 precision=seq_get(args, 2), 367 scale=seq_get(args, 3), 368 ), 369 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 370 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 371 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 372 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 373 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 374 "TO_VARCHAR": exp.ToChar.from_arg_list, 375 "ZEROIFNULL": _build_if_from_zeroifnull, 376 } 377 378 FUNCTION_PARSERS = { 379 **parser.Parser.FUNCTION_PARSERS, 380 "DATE_PART": lambda self: self._parse_date_part(), 381 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 382 } 383 FUNCTION_PARSERS.pop("TRIM") 384 385 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 386 387 RANGE_PARSERS = { 388 **parser.Parser.RANGE_PARSERS, 389 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 390 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 391 } 392 393 ALTER_PARSERS = { 394 **parser.Parser.ALTER_PARSERS, 395 "UNSET": lambda self: self.expression( 396 exp.Set, 397 tag=self._match_text_seq("TAG"), 398 expressions=self._parse_csv(self._parse_id_var), 399 unset=True, 400 ), 401 } 402 403 STATEMENT_PARSERS = { 404 **parser.Parser.STATEMENT_PARSERS, 405 TokenType.SHOW: lambda self: self._parse_show(), 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "LOCATION": lambda self: self._parse_location_property(), 411 } 412 413 TYPE_CONVERTERS = { 414 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 415 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 416 } 417 418 SHOW_PARSERS = { 419 "SCHEMAS": _show_parser("SCHEMAS"), 420 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 421 "OBJECTS": _show_parser("OBJECTS"), 422 "TERSE OBJECTS": _show_parser("OBJECTS"), 423 "TABLES": _show_parser("TABLES"), 424 "TERSE TABLES": _show_parser("TABLES"), 425 "VIEWS": _show_parser("VIEWS"), 426 "TERSE VIEWS": _show_parser("VIEWS"), 427 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 428 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 429 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 430 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 431 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 432 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 433 "SEQUENCES": _show_parser("SEQUENCES"), 434 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 435 "COLUMNS": _show_parser("COLUMNS"), 436 "USERS": _show_parser("USERS"), 437 "TERSE USERS": _show_parser("USERS"), 438 } 439 440 CONSTRAINT_PARSERS = { 441 **parser.Parser.CONSTRAINT_PARSERS, 442 "WITH": lambda self: self._parse_with_constraint(), 443 "MASKING": lambda self: self._parse_with_constraint(), 444 "PROJECTION": lambda self: self._parse_with_constraint(), 445 "TAG": lambda self: self._parse_with_constraint(), 446 } 447 448 STAGED_FILE_SINGLE_TOKENS = { 449 TokenType.DOT, 450 TokenType.MOD, 451 TokenType.SLASH, 452 } 453 454 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 455 456 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 457 458 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 459 460 LAMBDAS = { 461 **parser.Parser.LAMBDAS, 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_assignment(), 466 expressions, 467 ), 468 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 469 ), 470 } 471 472 def _negate_range( 473 self, this: t.Optional[exp.Expression] = None 474 ) -> t.Optional[exp.Expression]: 475 if not this: 476 return this 477 478 query = this.args.get("query") 479 if isinstance(this, exp.In) and isinstance(query, exp.Query): 480 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 481 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 482 # which can produce different results (most likely a SnowFlake bug). 483 # 484 # https://docs.snowflake.com/en/sql-reference/functions/in 485 # Context: https://github.com/tobymao/sqlglot/issues/3890 486 return self.expression( 487 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 488 ) 489 490 return self.expression(exp.Not, this=this) 491 492 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 493 if self._prev.token_type != TokenType.WITH: 494 self._retreat(self._index - 1) 495 496 if self._match_text_seq("MASKING", "POLICY"): 497 policy = self._parse_column() 498 return self.expression( 499 exp.MaskingPolicyColumnConstraint, 500 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 501 expressions=self._match(TokenType.USING) 502 and self._parse_wrapped_csv(self._parse_id_var), 503 ) 504 if self._match_text_seq("PROJECTION", "POLICY"): 505 policy = self._parse_column() 506 return self.expression( 507 exp.ProjectionPolicyColumnConstraint, 508 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 509 ) 510 if self._match(TokenType.TAG): 511 return self.expression( 512 exp.TagColumnConstraint, 513 expressions=self._parse_wrapped_csv(self._parse_property), 514 ) 515 516 return None 517 518 def _parse_create(self) -> exp.Create | exp.Command: 519 expression = super()._parse_create() 520 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 521 # Replace the Table node with the enclosed Identifier 522 expression.this.replace(expression.this.this) 523 524 return expression 525 526 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 527 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 528 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 529 this = self._parse_var() or self._parse_type() 530 531 if not this: 532 return None 533 534 self._match(TokenType.COMMA) 535 expression = self._parse_bitwise() 536 this = map_date_part(this) 537 name = this.name.upper() 538 539 if name.startswith("EPOCH"): 540 if name == "EPOCH_MILLISECOND": 541 scale = 10**3 542 elif name == "EPOCH_MICROSECOND": 543 scale = 10**6 544 elif name == "EPOCH_NANOSECOND": 545 scale = 10**9 546 else: 547 scale = None 548 549 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 550 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 551 552 if scale: 553 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 554 555 return to_unix 556 557 return self.expression(exp.Extract, this=this, expression=expression) 558 559 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 560 if is_map: 561 # Keys are strings in Snowflake's objects, see also: 562 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 563 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 564 return self._parse_slice(self._parse_string()) 565 566 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 567 568 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 569 lateral = super()._parse_lateral() 570 if not lateral: 571 return lateral 572 573 if isinstance(lateral.this, exp.Explode): 574 table_alias = lateral.args.get("alias") 575 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 576 if table_alias and not table_alias.args.get("columns"): 577 table_alias.set("columns", columns) 578 elif not table_alias: 579 exp.alias_(lateral, "_flattened", table=columns, copy=False) 580 581 return lateral 582 583 def _parse_table_parts( 584 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 585 ) -> exp.Table: 586 # https://docs.snowflake.com/en/user-guide/querying-stage 587 if self._match(TokenType.STRING, advance=False): 588 table = self._parse_string() 589 elif self._match_text_seq("@", advance=False): 590 table = self._parse_location_path() 591 else: 592 table = None 593 594 if table: 595 file_format = None 596 pattern = None 597 598 wrapped = self._match(TokenType.L_PAREN) 599 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 600 if self._match_text_seq("FILE_FORMAT", "=>"): 601 file_format = self._parse_string() or super()._parse_table_parts( 602 is_db_reference=is_db_reference 603 ) 604 elif self._match_text_seq("PATTERN", "=>"): 605 pattern = self._parse_string() 606 else: 607 break 608 609 self._match(TokenType.COMMA) 610 611 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 612 else: 613 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 614 615 return table 616 617 def _parse_id_var( 618 self, 619 any_token: bool = True, 620 tokens: t.Optional[t.Collection[TokenType]] = None, 621 ) -> t.Optional[exp.Expression]: 622 if self._match_text_seq("IDENTIFIER", "("): 623 identifier = ( 624 super()._parse_id_var(any_token=any_token, tokens=tokens) 625 or self._parse_string() 626 ) 627 self._match_r_paren() 628 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 629 630 return super()._parse_id_var(any_token=any_token, tokens=tokens) 631 632 def _parse_show_snowflake(self, this: str) -> exp.Show: 633 scope = None 634 scope_kind = None 635 636 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 637 # which is syntactically valid but has no effect on the output 638 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 639 640 history = self._match_text_seq("HISTORY") 641 642 like = self._parse_string() if self._match(TokenType.LIKE) else None 643 644 if self._match(TokenType.IN): 645 if self._match_text_seq("ACCOUNT"): 646 scope_kind = "ACCOUNT" 647 elif self._match_set(self.DB_CREATABLES): 648 scope_kind = self._prev.text.upper() 649 if self._curr: 650 scope = self._parse_table_parts() 651 elif self._curr: 652 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 653 scope = self._parse_table_parts() 654 655 return self.expression( 656 exp.Show, 657 **{ 658 "terse": terse, 659 "this": this, 660 "history": history, 661 "like": like, 662 "scope": scope, 663 "scope_kind": scope_kind, 664 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 665 "limit": self._parse_limit(), 666 "from": self._parse_string() if self._match(TokenType.FROM) else None, 667 }, 668 ) 669 670 def _parse_location_property(self) -> exp.LocationProperty: 671 self._match(TokenType.EQ) 672 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 673 674 def _parse_file_location(self) -> t.Optional[exp.Expression]: 675 # Parse either a subquery or a staged file 676 return ( 677 self._parse_select(table=True, parse_subquery_alias=False) 678 if self._match(TokenType.L_PAREN, advance=False) 679 else self._parse_table_parts() 680 ) 681 682 def _parse_location_path(self) -> exp.Var: 683 parts = [self._advance_any(ignore_reserved=True)] 684 685 # We avoid consuming a comma token because external tables like @foo and @bar 686 # can be joined in a query with a comma separator, as well as closing paren 687 # in case of subqueries 688 while self._is_connected() and not self._match_set( 689 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 690 ): 691 parts.append(self._advance_any(ignore_reserved=True)) 692 693 return exp.var("".join(part.text for part in parts if part)) 694 695 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 696 this = super()._parse_lambda_arg() 697 698 if not this: 699 return this 700 701 typ = self._parse_types() 702 703 if typ: 704 return self.expression(exp.Cast, this=this, to=typ) 705 706 return this 707 708 class Tokenizer(tokens.Tokenizer): 709 STRING_ESCAPES = ["\\", "'"] 710 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 711 RAW_STRINGS = ["$$"] 712 COMMENTS = ["--", "//", ("/*", "*/")] 713 NESTED_COMMENTS = False 714 715 KEYWORDS = { 716 **tokens.Tokenizer.KEYWORDS, 717 "BYTEINT": TokenType.INT, 718 "CHAR VARYING": TokenType.VARCHAR, 719 "CHARACTER VARYING": TokenType.VARCHAR, 720 "EXCLUDE": TokenType.EXCEPT, 721 "ILIKE ANY": TokenType.ILIKE_ANY, 722 "LIKE ANY": TokenType.LIKE_ANY, 723 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 724 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 725 "MINUS": TokenType.EXCEPT, 726 "NCHAR VARYING": TokenType.VARCHAR, 727 "PUT": TokenType.COMMAND, 728 "REMOVE": TokenType.COMMAND, 729 "RM": TokenType.COMMAND, 730 "SAMPLE": TokenType.TABLE_SAMPLE, 731 "SQL_DOUBLE": TokenType.DOUBLE, 732 "SQL_VARCHAR": TokenType.VARCHAR, 733 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 734 "TAG": TokenType.TAG, 735 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 736 "TOP": TokenType.TOP, 737 "WAREHOUSE": TokenType.WAREHOUSE, 738 "STREAMLIT": TokenType.STREAMLIT, 739 } 740 KEYWORDS.pop("/*+") 741 742 SINGLE_TOKENS = { 743 **tokens.Tokenizer.SINGLE_TOKENS, 744 "$": TokenType.PARAMETER, 745 } 746 747 VAR_SINGLE_TOKENS = {"$"} 748 749 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 750 751 class Generator(generator.Generator): 752 PARAMETER_TOKEN = "$" 753 MATCHED_BY_SOURCE = False 754 SINGLE_STRING_INTERVAL = True 755 JOIN_HINTS = False 756 TABLE_HINTS = False 757 QUERY_HINTS = False 758 AGGREGATE_FILTER_SUPPORTED = False 759 SUPPORTS_TABLE_COPY = False 760 COLLATE_IS_FUNC = True 761 LIMIT_ONLY_LITERALS = True 762 JSON_KEY_VALUE_PAIR_SEP = "," 763 INSERT_OVERWRITE = " OVERWRITE INTO" 764 STRUCT_DELIMITER = ("(", ")") 765 COPY_PARAMS_ARE_WRAPPED = False 766 COPY_PARAMS_EQ_REQUIRED = True 767 STAR_EXCEPT = "EXCLUDE" 768 SUPPORTS_EXPLODING_PROJECTIONS = False 769 ARRAY_CONCAT_IS_VAR_LEN = False 770 SUPPORTS_CONVERT_TIMEZONE = True 771 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 772 SUPPORTS_MEDIAN = True 773 774 TRANSFORMS = { 775 **generator.Generator.TRANSFORMS, 776 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 777 exp.ArgMax: rename_func("MAX_BY"), 778 exp.ArgMin: rename_func("MIN_BY"), 779 exp.Array: inline_array_sql, 780 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 781 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 782 exp.AtTimeZone: lambda self, e: self.func( 783 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 784 ), 785 exp.BitwiseXor: rename_func("BITXOR"), 786 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 787 exp.DateAdd: date_delta_sql("DATEADD"), 788 exp.DateDiff: date_delta_sql("DATEDIFF"), 789 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 790 exp.DatetimeDiff: timestampdiff_sql, 791 exp.DateStrToDate: datestrtodate_sql, 792 exp.DayOfMonth: rename_func("DAYOFMONTH"), 793 exp.DayOfWeek: rename_func("DAYOFWEEK"), 794 exp.DayOfYear: rename_func("DAYOFYEAR"), 795 exp.Explode: rename_func("FLATTEN"), 796 exp.Extract: rename_func("DATE_PART"), 797 exp.FromTimeZone: lambda self, e: self.func( 798 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 799 ), 800 exp.GenerateSeries: lambda self, e: self.func( 801 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 802 ), 803 exp.GroupConcat: rename_func("LISTAGG"), 804 exp.If: if_sql(name="IFF", false_value="NULL"), 805 exp.JSONExtractScalar: lambda self, e: self.func( 806 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 807 ), 808 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 809 exp.JSONPathRoot: lambda *_: "", 810 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 811 exp.LogicalOr: rename_func("BOOLOR_AGG"), 812 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 813 exp.Max: max_or_greatest, 814 exp.Min: min_or_least, 815 exp.ParseJSON: lambda self, e: self.func( 816 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 817 ), 818 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 819 exp.PercentileCont: transforms.preprocess( 820 [transforms.add_within_group_for_percentiles] 821 ), 822 exp.PercentileDisc: transforms.preprocess( 823 [transforms.add_within_group_for_percentiles] 824 ), 825 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 826 exp.RegexpILike: _regexpilike_sql, 827 exp.Rand: rename_func("RANDOM"), 828 exp.Select: transforms.preprocess( 829 [ 830 transforms.eliminate_distinct_on, 831 transforms.explode_to_unnest(), 832 transforms.eliminate_semi_and_anti_joins, 833 _unnest_generate_date_array, 834 ] 835 ), 836 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 837 exp.SHA: rename_func("SHA1"), 838 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 839 exp.StartsWith: rename_func("STARTSWITH"), 840 exp.StrPosition: lambda self, e: self.func( 841 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 842 ), 843 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 844 exp.Stuff: rename_func("INSERT"), 845 exp.TimeAdd: date_delta_sql("TIMEADD"), 846 exp.Timestamp: no_timestamp_sql, 847 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 848 exp.TimestampDiff: lambda self, e: self.func( 849 "TIMESTAMPDIFF", e.unit, e.expression, e.this 850 ), 851 exp.TimestampTrunc: timestamptrunc_sql(), 852 exp.TimeStrToTime: timestrtotime_sql, 853 exp.TimeToStr: lambda self, e: self.func( 854 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 855 ), 856 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 857 exp.ToArray: rename_func("TO_ARRAY"), 858 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 859 exp.ToDouble: rename_func("TO_DOUBLE"), 860 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 861 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 862 exp.TsOrDsToDate: lambda self, e: self.func( 863 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 864 ), 865 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 866 exp.Uuid: rename_func("UUID_STRING"), 867 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 868 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 869 exp.Xor: rename_func("BOOLXOR"), 870 } 871 872 SUPPORTED_JSON_PATH_PARTS = { 873 exp.JSONPathKey, 874 exp.JSONPathRoot, 875 exp.JSONPathSubscript, 876 } 877 878 TYPE_MAPPING = { 879 **generator.Generator.TYPE_MAPPING, 880 exp.DataType.Type.NESTED: "OBJECT", 881 exp.DataType.Type.STRUCT: "OBJECT", 882 } 883 884 PROPERTIES_LOCATION = { 885 **generator.Generator.PROPERTIES_LOCATION, 886 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 887 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 888 } 889 890 UNSUPPORTED_VALUES_EXPRESSIONS = { 891 exp.Map, 892 exp.StarMap, 893 exp.Struct, 894 exp.VarMap, 895 } 896 897 def with_properties(self, properties: exp.Properties) -> str: 898 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 899 900 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 901 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 902 values_as_table = False 903 904 return super().values_sql(expression, values_as_table=values_as_table) 905 906 def datatype_sql(self, expression: exp.DataType) -> str: 907 expressions = expression.expressions 908 if ( 909 expressions 910 and expression.is_type(*exp.DataType.STRUCT_TYPES) 911 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 912 ): 913 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 914 return "OBJECT" 915 916 return super().datatype_sql(expression) 917 918 def tonumber_sql(self, expression: exp.ToNumber) -> str: 919 return self.func( 920 "TO_NUMBER", 921 expression.this, 922 expression.args.get("format"), 923 expression.args.get("precision"), 924 expression.args.get("scale"), 925 ) 926 927 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 928 milli = expression.args.get("milli") 929 if milli is not None: 930 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 931 expression.set("nano", milli_to_nano) 932 933 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 934 935 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 936 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 937 return self.func("TO_GEOGRAPHY", expression.this) 938 if expression.is_type(exp.DataType.Type.GEOMETRY): 939 return self.func("TO_GEOMETRY", expression.this) 940 941 return super().cast_sql(expression, safe_prefix=safe_prefix) 942 943 def trycast_sql(self, expression: exp.TryCast) -> str: 944 value = expression.this 945 946 if value.type is None: 947 from sqlglot.optimizer.annotate_types import annotate_types 948 949 value = annotate_types(value) 950 951 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 952 return super().trycast_sql(expression) 953 954 # TRY_CAST only works for string values in Snowflake 955 return self.cast_sql(expression) 956 957 def log_sql(self, expression: exp.Log) -> str: 958 if not expression.expression: 959 return self.func("LN", expression.this) 960 961 return super().log_sql(expression) 962 963 def unnest_sql(self, expression: exp.Unnest) -> str: 964 unnest_alias = expression.args.get("alias") 965 offset = expression.args.get("offset") 966 967 columns = [ 968 exp.to_identifier("seq"), 969 exp.to_identifier("key"), 970 exp.to_identifier("path"), 971 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 972 seq_get(unnest_alias.columns if unnest_alias else [], 0) 973 or exp.to_identifier("value"), 974 exp.to_identifier("this"), 975 ] 976 977 if unnest_alias: 978 unnest_alias.set("columns", columns) 979 else: 980 unnest_alias = exp.TableAlias(this="_u", columns=columns) 981 982 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 983 alias = self.sql(unnest_alias) 984 alias = f" AS {alias}" if alias else "" 985 return f"{explode}{alias}" 986 987 def show_sql(self, expression: exp.Show) -> str: 988 terse = "TERSE " if expression.args.get("terse") else "" 989 history = " HISTORY" if expression.args.get("history") else "" 990 like = self.sql(expression, "like") 991 like = f" LIKE {like}" if like else "" 992 993 scope = self.sql(expression, "scope") 994 scope = f" {scope}" if scope else "" 995 996 scope_kind = self.sql(expression, "scope_kind") 997 if scope_kind: 998 scope_kind = f" IN {scope_kind}" 999 1000 starts_with = self.sql(expression, "starts_with") 1001 if starts_with: 1002 starts_with = f" STARTS WITH {starts_with}" 1003 1004 limit = self.sql(expression, "limit") 1005 1006 from_ = self.sql(expression, "from") 1007 if from_: 1008 from_ = f" FROM {from_}" 1009 1010 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1011 1012 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1013 # Other dialects don't support all of the following parameters, so we need to 1014 # generate default values as necessary to ensure the transpilation is correct 1015 group = expression.args.get("group") 1016 1017 # To avoid generating all these default values, we set group to None if 1018 # it's 0 (also default value) which doesn't trigger the following chain 1019 if group and group.name == "0": 1020 group = None 1021 1022 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1023 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1024 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1025 1026 return self.func( 1027 "REGEXP_SUBSTR", 1028 expression.this, 1029 expression.expression, 1030 position, 1031 occurrence, 1032 parameters, 1033 group, 1034 ) 1035 1036 def describe_sql(self, expression: exp.Describe) -> str: 1037 # Default to table if kind is unknown 1038 kind_value = expression.args.get("kind") or "TABLE" 1039 kind = f" {kind_value}" if kind_value else "" 1040 this = f" {self.sql(expression, 'this')}" 1041 expressions = self.expressions(expression, flat=True) 1042 expressions = f" {expressions}" if expressions else "" 1043 return f"DESCRIBE{kind}{this}{expressions}" 1044 1045 def generatedasidentitycolumnconstraint_sql( 1046 self, expression: exp.GeneratedAsIdentityColumnConstraint 1047 ) -> str: 1048 start = expression.args.get("start") 1049 start = f" START {start}" if start else "" 1050 increment = expression.args.get("increment") 1051 increment = f" INCREMENT {increment}" if increment else "" 1052 return f"AUTOINCREMENT{start}{increment}" 1053 1054 def cluster_sql(self, expression: exp.Cluster) -> str: 1055 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1056 1057 def struct_sql(self, expression: exp.Struct) -> str: 1058 keys = [] 1059 values = [] 1060 1061 for i, e in enumerate(expression.expressions): 1062 if isinstance(e, exp.PropertyEQ): 1063 keys.append( 1064 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1065 ) 1066 values.append(e.expression) 1067 else: 1068 keys.append(exp.Literal.string(f"_{i}")) 1069 values.append(e) 1070 1071 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1072 1073 @unsupported_args("weight", "accuracy") 1074 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1075 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1076 1077 def alterset_sql(self, expression: exp.AlterSet) -> str: 1078 exprs = self.expressions(expression, flat=True) 1079 exprs = f" {exprs}" if exprs else "" 1080 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1081 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1082 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1083 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1084 tag = self.expressions(expression, key="tag", flat=True) 1085 tag = f" TAG {tag}" if tag else "" 1086 1087 return f"SET{exprs}{file_format}{copy_options}{tag}" 1088 1089 def strtotime_sql(self, expression: exp.StrToTime): 1090 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1091 return self.func( 1092 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1093 ) 1094 1095 def timestampsub_sql(self, expression: exp.TimestampSub): 1096 return self.sql( 1097 exp.TimestampAdd( 1098 this=expression.this, 1099 expression=expression.expression * -1, 1100 unit=expression.unit, 1101 ) 1102 ) 1103 1104 def jsonextract_sql(self, expression: exp.JSONExtract): 1105 this = expression.this 1106 1107 # JSON strings are valid coming from other dialects such as BQ 1108 return self.func( 1109 "GET_PATH", 1110 exp.ParseJSON(this=this) if this.is_string else this, 1111 expression.expression, 1112 )
239class Snowflake(Dialect): 240 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 241 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 242 NULL_ORDERING = "nulls_are_large" 243 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 244 SUPPORTS_USER_DEFINED_TYPES = False 245 SUPPORTS_SEMI_ANTI_JOIN = False 246 PREFER_CTE_ALIAS_COLUMN = True 247 TABLESAMPLE_SIZE_IS_PERCENT = True 248 COPY_PARAMS_ARE_CSV = False 249 ARRAY_AGG_INCLUDES_NULLS = None 250 251 TIME_MAPPING = { 252 "YYYY": "%Y", 253 "yyyy": "%Y", 254 "YY": "%y", 255 "yy": "%y", 256 "MMMM": "%B", 257 "mmmm": "%B", 258 "MON": "%b", 259 "mon": "%b", 260 "MM": "%m", 261 "mm": "%m", 262 "DD": "%d", 263 "dd": "%-d", 264 "DY": "%a", 265 "dy": "%w", 266 "HH24": "%H", 267 "hh24": "%H", 268 "HH12": "%I", 269 "hh12": "%I", 270 "MI": "%M", 271 "mi": "%M", 272 "SS": "%S", 273 "ss": "%S", 274 "FF": "%f", 275 "ff": "%f", 276 "FF6": "%f", 277 "ff6": "%f", 278 } 279 280 def quote_identifier(self, expression: E, identify: bool = True) -> E: 281 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 282 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 283 if ( 284 isinstance(expression, exp.Identifier) 285 and isinstance(expression.parent, exp.Table) 286 and expression.name.lower() == "dual" 287 ): 288 return expression # type: ignore 289 290 return super().quote_identifier(expression, identify=identify) 291 292 class Parser(parser.Parser): 293 IDENTIFY_PIVOT_STRINGS = True 294 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 295 COLON_IS_VARIANT_EXTRACT = True 296 297 ID_VAR_TOKENS = { 298 *parser.Parser.ID_VAR_TOKENS, 299 TokenType.MATCH_CONDITION, 300 } 301 302 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 303 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 304 305 FUNCTIONS = { 306 **parser.Parser.FUNCTIONS, 307 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 308 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 309 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 310 this=seq_get(args, 1), expression=seq_get(args, 0) 311 ), 312 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 313 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 314 start=seq_get(args, 0), 315 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 316 step=seq_get(args, 2), 317 ), 318 "BITXOR": binary_from_function(exp.BitwiseXor), 319 "BIT_XOR": binary_from_function(exp.BitwiseXor), 320 "BOOLXOR": binary_from_function(exp.Xor), 321 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 322 "DATE_TRUNC": _date_trunc_to_time, 323 "DATEADD": _build_date_time_add(exp.DateAdd), 324 "DATEDIFF": _build_datediff, 325 "DIV0": _build_if_from_div0, 326 "FLATTEN": exp.Explode.from_arg_list, 327 "GET_PATH": lambda args, dialect: exp.JSONExtract( 328 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 329 ), 330 "IFF": exp.If.from_arg_list, 331 "LAST_DAY": lambda args: exp.LastDay( 332 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 333 ), 334 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 335 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 336 "LISTAGG": exp.GroupConcat.from_arg_list, 337 "NULLIFZERO": _build_if_from_nullifzero, 338 "OBJECT_CONSTRUCT": _build_object_construct, 339 "REGEXP_REPLACE": _build_regexp_replace, 340 "REGEXP_SUBSTR": lambda args: exp.RegexpExtract( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 position=seq_get(args, 2), 344 occurrence=seq_get(args, 3), 345 parameters=seq_get(args, 4), 346 group=seq_get(args, 5) or exp.Literal.number(0), 347 ), 348 "RLIKE": exp.RegexpLike.from_arg_list, 349 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 350 "TIMEADD": _build_date_time_add(exp.TimeAdd), 351 "TIMEDIFF": _build_datediff, 352 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 353 "TIMESTAMPDIFF": _build_datediff, 354 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 355 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 356 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 357 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 358 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 359 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 360 "TRY_TO_TIMESTAMP": _build_datetime( 361 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 362 ), 363 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 364 "TO_NUMBER": lambda args: exp.ToNumber( 365 this=seq_get(args, 0), 366 format=seq_get(args, 1), 367 precision=seq_get(args, 2), 368 scale=seq_get(args, 3), 369 ), 370 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 371 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 372 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 373 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 374 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 375 "TO_VARCHAR": exp.ToChar.from_arg_list, 376 "ZEROIFNULL": _build_if_from_zeroifnull, 377 } 378 379 FUNCTION_PARSERS = { 380 **parser.Parser.FUNCTION_PARSERS, 381 "DATE_PART": lambda self: self._parse_date_part(), 382 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 383 } 384 FUNCTION_PARSERS.pop("TRIM") 385 386 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 387 388 RANGE_PARSERS = { 389 **parser.Parser.RANGE_PARSERS, 390 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 391 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 392 } 393 394 ALTER_PARSERS = { 395 **parser.Parser.ALTER_PARSERS, 396 "UNSET": lambda self: self.expression( 397 exp.Set, 398 tag=self._match_text_seq("TAG"), 399 expressions=self._parse_csv(self._parse_id_var), 400 unset=True, 401 ), 402 } 403 404 STATEMENT_PARSERS = { 405 **parser.Parser.STATEMENT_PARSERS, 406 TokenType.SHOW: lambda self: self._parse_show(), 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "LOCATION": lambda self: self._parse_location_property(), 412 } 413 414 TYPE_CONVERTERS = { 415 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 416 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 417 } 418 419 SHOW_PARSERS = { 420 "SCHEMAS": _show_parser("SCHEMAS"), 421 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 422 "OBJECTS": _show_parser("OBJECTS"), 423 "TERSE OBJECTS": _show_parser("OBJECTS"), 424 "TABLES": _show_parser("TABLES"), 425 "TERSE TABLES": _show_parser("TABLES"), 426 "VIEWS": _show_parser("VIEWS"), 427 "TERSE VIEWS": _show_parser("VIEWS"), 428 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 429 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 430 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 431 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 432 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 433 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 434 "SEQUENCES": _show_parser("SEQUENCES"), 435 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 436 "COLUMNS": _show_parser("COLUMNS"), 437 "USERS": _show_parser("USERS"), 438 "TERSE USERS": _show_parser("USERS"), 439 } 440 441 CONSTRAINT_PARSERS = { 442 **parser.Parser.CONSTRAINT_PARSERS, 443 "WITH": lambda self: self._parse_with_constraint(), 444 "MASKING": lambda self: self._parse_with_constraint(), 445 "PROJECTION": lambda self: self._parse_with_constraint(), 446 "TAG": lambda self: self._parse_with_constraint(), 447 } 448 449 STAGED_FILE_SINGLE_TOKENS = { 450 TokenType.DOT, 451 TokenType.MOD, 452 TokenType.SLASH, 453 } 454 455 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 456 457 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 458 459 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 460 461 LAMBDAS = { 462 **parser.Parser.LAMBDAS, 463 TokenType.ARROW: lambda self, expressions: self.expression( 464 exp.Lambda, 465 this=self._replace_lambda( 466 self._parse_assignment(), 467 expressions, 468 ), 469 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 470 ), 471 } 472 473 def _negate_range( 474 self, this: t.Optional[exp.Expression] = None 475 ) -> t.Optional[exp.Expression]: 476 if not this: 477 return this 478 479 query = this.args.get("query") 480 if isinstance(this, exp.In) and isinstance(query, exp.Query): 481 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 482 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 483 # which can produce different results (most likely a SnowFlake bug). 484 # 485 # https://docs.snowflake.com/en/sql-reference/functions/in 486 # Context: https://github.com/tobymao/sqlglot/issues/3890 487 return self.expression( 488 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 489 ) 490 491 return self.expression(exp.Not, this=this) 492 493 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 494 if self._prev.token_type != TokenType.WITH: 495 self._retreat(self._index - 1) 496 497 if self._match_text_seq("MASKING", "POLICY"): 498 policy = self._parse_column() 499 return self.expression( 500 exp.MaskingPolicyColumnConstraint, 501 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 502 expressions=self._match(TokenType.USING) 503 and self._parse_wrapped_csv(self._parse_id_var), 504 ) 505 if self._match_text_seq("PROJECTION", "POLICY"): 506 policy = self._parse_column() 507 return self.expression( 508 exp.ProjectionPolicyColumnConstraint, 509 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 510 ) 511 if self._match(TokenType.TAG): 512 return self.expression( 513 exp.TagColumnConstraint, 514 expressions=self._parse_wrapped_csv(self._parse_property), 515 ) 516 517 return None 518 519 def _parse_create(self) -> exp.Create | exp.Command: 520 expression = super()._parse_create() 521 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 522 # Replace the Table node with the enclosed Identifier 523 expression.this.replace(expression.this.this) 524 525 return expression 526 527 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 528 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 529 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 530 this = self._parse_var() or self._parse_type() 531 532 if not this: 533 return None 534 535 self._match(TokenType.COMMA) 536 expression = self._parse_bitwise() 537 this = map_date_part(this) 538 name = this.name.upper() 539 540 if name.startswith("EPOCH"): 541 if name == "EPOCH_MILLISECOND": 542 scale = 10**3 543 elif name == "EPOCH_MICROSECOND": 544 scale = 10**6 545 elif name == "EPOCH_NANOSECOND": 546 scale = 10**9 547 else: 548 scale = None 549 550 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 551 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 552 553 if scale: 554 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 555 556 return to_unix 557 558 return self.expression(exp.Extract, this=this, expression=expression) 559 560 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 561 if is_map: 562 # Keys are strings in Snowflake's objects, see also: 563 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 564 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 565 return self._parse_slice(self._parse_string()) 566 567 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 568 569 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 570 lateral = super()._parse_lateral() 571 if not lateral: 572 return lateral 573 574 if isinstance(lateral.this, exp.Explode): 575 table_alias = lateral.args.get("alias") 576 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 577 if table_alias and not table_alias.args.get("columns"): 578 table_alias.set("columns", columns) 579 elif not table_alias: 580 exp.alias_(lateral, "_flattened", table=columns, copy=False) 581 582 return lateral 583 584 def _parse_table_parts( 585 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 586 ) -> exp.Table: 587 # https://docs.snowflake.com/en/user-guide/querying-stage 588 if self._match(TokenType.STRING, advance=False): 589 table = self._parse_string() 590 elif self._match_text_seq("@", advance=False): 591 table = self._parse_location_path() 592 else: 593 table = None 594 595 if table: 596 file_format = None 597 pattern = None 598 599 wrapped = self._match(TokenType.L_PAREN) 600 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 601 if self._match_text_seq("FILE_FORMAT", "=>"): 602 file_format = self._parse_string() or super()._parse_table_parts( 603 is_db_reference=is_db_reference 604 ) 605 elif self._match_text_seq("PATTERN", "=>"): 606 pattern = self._parse_string() 607 else: 608 break 609 610 self._match(TokenType.COMMA) 611 612 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 613 else: 614 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 615 616 return table 617 618 def _parse_id_var( 619 self, 620 any_token: bool = True, 621 tokens: t.Optional[t.Collection[TokenType]] = None, 622 ) -> t.Optional[exp.Expression]: 623 if self._match_text_seq("IDENTIFIER", "("): 624 identifier = ( 625 super()._parse_id_var(any_token=any_token, tokens=tokens) 626 or self._parse_string() 627 ) 628 self._match_r_paren() 629 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 630 631 return super()._parse_id_var(any_token=any_token, tokens=tokens) 632 633 def _parse_show_snowflake(self, this: str) -> exp.Show: 634 scope = None 635 scope_kind = None 636 637 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 638 # which is syntactically valid but has no effect on the output 639 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 640 641 history = self._match_text_seq("HISTORY") 642 643 like = self._parse_string() if self._match(TokenType.LIKE) else None 644 645 if self._match(TokenType.IN): 646 if self._match_text_seq("ACCOUNT"): 647 scope_kind = "ACCOUNT" 648 elif self._match_set(self.DB_CREATABLES): 649 scope_kind = self._prev.text.upper() 650 if self._curr: 651 scope = self._parse_table_parts() 652 elif self._curr: 653 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 654 scope = self._parse_table_parts() 655 656 return self.expression( 657 exp.Show, 658 **{ 659 "terse": terse, 660 "this": this, 661 "history": history, 662 "like": like, 663 "scope": scope, 664 "scope_kind": scope_kind, 665 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 666 "limit": self._parse_limit(), 667 "from": self._parse_string() if self._match(TokenType.FROM) else None, 668 }, 669 ) 670 671 def _parse_location_property(self) -> exp.LocationProperty: 672 self._match(TokenType.EQ) 673 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 674 675 def _parse_file_location(self) -> t.Optional[exp.Expression]: 676 # Parse either a subquery or a staged file 677 return ( 678 self._parse_select(table=True, parse_subquery_alias=False) 679 if self._match(TokenType.L_PAREN, advance=False) 680 else self._parse_table_parts() 681 ) 682 683 def _parse_location_path(self) -> exp.Var: 684 parts = [self._advance_any(ignore_reserved=True)] 685 686 # We avoid consuming a comma token because external tables like @foo and @bar 687 # can be joined in a query with a comma separator, as well as closing paren 688 # in case of subqueries 689 while self._is_connected() and not self._match_set( 690 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 691 ): 692 parts.append(self._advance_any(ignore_reserved=True)) 693 694 return exp.var("".join(part.text for part in parts if part)) 695 696 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 697 this = super()._parse_lambda_arg() 698 699 if not this: 700 return this 701 702 typ = self._parse_types() 703 704 if typ: 705 return self.expression(exp.Cast, this=this, to=typ) 706 707 return this 708 709 class Tokenizer(tokens.Tokenizer): 710 STRING_ESCAPES = ["\\", "'"] 711 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 712 RAW_STRINGS = ["$$"] 713 COMMENTS = ["--", "//", ("/*", "*/")] 714 NESTED_COMMENTS = False 715 716 KEYWORDS = { 717 **tokens.Tokenizer.KEYWORDS, 718 "BYTEINT": TokenType.INT, 719 "CHAR VARYING": TokenType.VARCHAR, 720 "CHARACTER VARYING": TokenType.VARCHAR, 721 "EXCLUDE": TokenType.EXCEPT, 722 "ILIKE ANY": TokenType.ILIKE_ANY, 723 "LIKE ANY": TokenType.LIKE_ANY, 724 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 725 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 726 "MINUS": TokenType.EXCEPT, 727 "NCHAR VARYING": TokenType.VARCHAR, 728 "PUT": TokenType.COMMAND, 729 "REMOVE": TokenType.COMMAND, 730 "RM": TokenType.COMMAND, 731 "SAMPLE": TokenType.TABLE_SAMPLE, 732 "SQL_DOUBLE": TokenType.DOUBLE, 733 "SQL_VARCHAR": TokenType.VARCHAR, 734 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 735 "TAG": TokenType.TAG, 736 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 737 "TOP": TokenType.TOP, 738 "WAREHOUSE": TokenType.WAREHOUSE, 739 "STREAMLIT": TokenType.STREAMLIT, 740 } 741 KEYWORDS.pop("/*+") 742 743 SINGLE_TOKENS = { 744 **tokens.Tokenizer.SINGLE_TOKENS, 745 "$": TokenType.PARAMETER, 746 } 747 748 VAR_SINGLE_TOKENS = {"$"} 749 750 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 751 752 class Generator(generator.Generator): 753 PARAMETER_TOKEN = "$" 754 MATCHED_BY_SOURCE = False 755 SINGLE_STRING_INTERVAL = True 756 JOIN_HINTS = False 757 TABLE_HINTS = False 758 QUERY_HINTS = False 759 AGGREGATE_FILTER_SUPPORTED = False 760 SUPPORTS_TABLE_COPY = False 761 COLLATE_IS_FUNC = True 762 LIMIT_ONLY_LITERALS = True 763 JSON_KEY_VALUE_PAIR_SEP = "," 764 INSERT_OVERWRITE = " OVERWRITE INTO" 765 STRUCT_DELIMITER = ("(", ")") 766 COPY_PARAMS_ARE_WRAPPED = False 767 COPY_PARAMS_EQ_REQUIRED = True 768 STAR_EXCEPT = "EXCLUDE" 769 SUPPORTS_EXPLODING_PROJECTIONS = False 770 ARRAY_CONCAT_IS_VAR_LEN = False 771 SUPPORTS_CONVERT_TIMEZONE = True 772 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 773 SUPPORTS_MEDIAN = True 774 775 TRANSFORMS = { 776 **generator.Generator.TRANSFORMS, 777 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 778 exp.ArgMax: rename_func("MAX_BY"), 779 exp.ArgMin: rename_func("MIN_BY"), 780 exp.Array: inline_array_sql, 781 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 782 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 783 exp.AtTimeZone: lambda self, e: self.func( 784 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 785 ), 786 exp.BitwiseXor: rename_func("BITXOR"), 787 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 788 exp.DateAdd: date_delta_sql("DATEADD"), 789 exp.DateDiff: date_delta_sql("DATEDIFF"), 790 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 791 exp.DatetimeDiff: timestampdiff_sql, 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtractScalar: lambda self, e: self.func( 807 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 808 ), 809 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 810 exp.JSONPathRoot: lambda *_: "", 811 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 812 exp.LogicalOr: rename_func("BOOLOR_AGG"), 813 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 814 exp.Max: max_or_greatest, 815 exp.Min: min_or_least, 816 exp.ParseJSON: lambda self, e: self.func( 817 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 818 ), 819 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 820 exp.PercentileCont: transforms.preprocess( 821 [transforms.add_within_group_for_percentiles] 822 ), 823 exp.PercentileDisc: transforms.preprocess( 824 [transforms.add_within_group_for_percentiles] 825 ), 826 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 827 exp.RegexpILike: _regexpilike_sql, 828 exp.Rand: rename_func("RANDOM"), 829 exp.Select: transforms.preprocess( 830 [ 831 transforms.eliminate_distinct_on, 832 transforms.explode_to_unnest(), 833 transforms.eliminate_semi_and_anti_joins, 834 _unnest_generate_date_array, 835 ] 836 ), 837 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 838 exp.SHA: rename_func("SHA1"), 839 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 840 exp.StartsWith: rename_func("STARTSWITH"), 841 exp.StrPosition: lambda self, e: self.func( 842 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 843 ), 844 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 845 exp.Stuff: rename_func("INSERT"), 846 exp.TimeAdd: date_delta_sql("TIMEADD"), 847 exp.Timestamp: no_timestamp_sql, 848 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 849 exp.TimestampDiff: lambda self, e: self.func( 850 "TIMESTAMPDIFF", e.unit, e.expression, e.this 851 ), 852 exp.TimestampTrunc: timestamptrunc_sql(), 853 exp.TimeStrToTime: timestrtotime_sql, 854 exp.TimeToStr: lambda self, e: self.func( 855 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 856 ), 857 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 858 exp.ToArray: rename_func("TO_ARRAY"), 859 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 860 exp.ToDouble: rename_func("TO_DOUBLE"), 861 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 862 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 863 exp.TsOrDsToDate: lambda self, e: self.func( 864 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 865 ), 866 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 867 exp.Uuid: rename_func("UUID_STRING"), 868 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 869 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 870 exp.Xor: rename_func("BOOLXOR"), 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 exp.DataType.Type.NESTED: "OBJECT", 882 exp.DataType.Type.STRUCT: "OBJECT", 883 } 884 885 PROPERTIES_LOCATION = { 886 **generator.Generator.PROPERTIES_LOCATION, 887 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 888 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 889 } 890 891 UNSUPPORTED_VALUES_EXPRESSIONS = { 892 exp.Map, 893 exp.StarMap, 894 exp.Struct, 895 exp.VarMap, 896 } 897 898 def with_properties(self, properties: exp.Properties) -> str: 899 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 900 901 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 902 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 903 values_as_table = False 904 905 return super().values_sql(expression, values_as_table=values_as_table) 906 907 def datatype_sql(self, expression: exp.DataType) -> str: 908 expressions = expression.expressions 909 if ( 910 expressions 911 and expression.is_type(*exp.DataType.STRUCT_TYPES) 912 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 913 ): 914 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 915 return "OBJECT" 916 917 return super().datatype_sql(expression) 918 919 def tonumber_sql(self, expression: exp.ToNumber) -> str: 920 return self.func( 921 "TO_NUMBER", 922 expression.this, 923 expression.args.get("format"), 924 expression.args.get("precision"), 925 expression.args.get("scale"), 926 ) 927 928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 milli = expression.args.get("milli") 930 if milli is not None: 931 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 932 expression.set("nano", milli_to_nano) 933 934 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 935 936 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 937 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 938 return self.func("TO_GEOGRAPHY", expression.this) 939 if expression.is_type(exp.DataType.Type.GEOMETRY): 940 return self.func("TO_GEOMETRY", expression.this) 941 942 return super().cast_sql(expression, safe_prefix=safe_prefix) 943 944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 value = expression.this 946 947 if value.type is None: 948 from sqlglot.optimizer.annotate_types import annotate_types 949 950 value = annotate_types(value) 951 952 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 953 return super().trycast_sql(expression) 954 955 # TRY_CAST only works for string values in Snowflake 956 return self.cast_sql(expression) 957 958 def log_sql(self, expression: exp.Log) -> str: 959 if not expression.expression: 960 return self.func("LN", expression.this) 961 962 return super().log_sql(expression) 963 964 def unnest_sql(self, expression: exp.Unnest) -> str: 965 unnest_alias = expression.args.get("alias") 966 offset = expression.args.get("offset") 967 968 columns = [ 969 exp.to_identifier("seq"), 970 exp.to_identifier("key"), 971 exp.to_identifier("path"), 972 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 973 seq_get(unnest_alias.columns if unnest_alias else [], 0) 974 or exp.to_identifier("value"), 975 exp.to_identifier("this"), 976 ] 977 978 if unnest_alias: 979 unnest_alias.set("columns", columns) 980 else: 981 unnest_alias = exp.TableAlias(this="_u", columns=columns) 982 983 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 984 alias = self.sql(unnest_alias) 985 alias = f" AS {alias}" if alias else "" 986 return f"{explode}{alias}" 987 988 def show_sql(self, expression: exp.Show) -> str: 989 terse = "TERSE " if expression.args.get("terse") else "" 990 history = " HISTORY" if expression.args.get("history") else "" 991 like = self.sql(expression, "like") 992 like = f" LIKE {like}" if like else "" 993 994 scope = self.sql(expression, "scope") 995 scope = f" {scope}" if scope else "" 996 997 scope_kind = self.sql(expression, "scope_kind") 998 if scope_kind: 999 scope_kind = f" IN {scope_kind}" 1000 1001 starts_with = self.sql(expression, "starts_with") 1002 if starts_with: 1003 starts_with = f" STARTS WITH {starts_with}" 1004 1005 limit = self.sql(expression, "limit") 1006 1007 from_ = self.sql(expression, "from") 1008 if from_: 1009 from_ = f" FROM {from_}" 1010 1011 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1012 1013 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1014 # Other dialects don't support all of the following parameters, so we need to 1015 # generate default values as necessary to ensure the transpilation is correct 1016 group = expression.args.get("group") 1017 1018 # To avoid generating all these default values, we set group to None if 1019 # it's 0 (also default value) which doesn't trigger the following chain 1020 if group and group.name == "0": 1021 group = None 1022 1023 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1024 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1025 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1026 1027 return self.func( 1028 "REGEXP_SUBSTR", 1029 expression.this, 1030 expression.expression, 1031 position, 1032 occurrence, 1033 parameters, 1034 group, 1035 ) 1036 1037 def describe_sql(self, expression: exp.Describe) -> str: 1038 # Default to table if kind is unknown 1039 kind_value = expression.args.get("kind") or "TABLE" 1040 kind = f" {kind_value}" if kind_value else "" 1041 this = f" {self.sql(expression, 'this')}" 1042 expressions = self.expressions(expression, flat=True) 1043 expressions = f" {expressions}" if expressions else "" 1044 return f"DESCRIBE{kind}{this}{expressions}" 1045 1046 def generatedasidentitycolumnconstraint_sql( 1047 self, expression: exp.GeneratedAsIdentityColumnConstraint 1048 ) -> str: 1049 start = expression.args.get("start") 1050 start = f" START {start}" if start else "" 1051 increment = expression.args.get("increment") 1052 increment = f" INCREMENT {increment}" if increment else "" 1053 return f"AUTOINCREMENT{start}{increment}" 1054 1055 def cluster_sql(self, expression: exp.Cluster) -> str: 1056 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1057 1058 def struct_sql(self, expression: exp.Struct) -> str: 1059 keys = [] 1060 values = [] 1061 1062 for i, e in enumerate(expression.expressions): 1063 if isinstance(e, exp.PropertyEQ): 1064 keys.append( 1065 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1066 ) 1067 values.append(e.expression) 1068 else: 1069 keys.append(exp.Literal.string(f"_{i}")) 1070 values.append(e) 1071 1072 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1073 1074 @unsupported_args("weight", "accuracy") 1075 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1076 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1077 1078 def alterset_sql(self, expression: exp.AlterSet) -> str: 1079 exprs = self.expressions(expression, flat=True) 1080 exprs = f" {exprs}" if exprs else "" 1081 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1082 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1083 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1084 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1085 tag = self.expressions(expression, key="tag", flat=True) 1086 tag = f" TAG {tag}" if tag else "" 1087 1088 return f"SET{exprs}{file_format}{copy_options}{tag}" 1089 1090 def strtotime_sql(self, expression: exp.StrToTime): 1091 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1092 return self.func( 1093 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1094 ) 1095 1096 def timestampsub_sql(self, expression: exp.TimestampSub): 1097 return self.sql( 1098 exp.TimestampAdd( 1099 this=expression.this, 1100 expression=expression.expression * -1, 1101 unit=expression.unit, 1102 ) 1103 ) 1104 1105 def jsonextract_sql(self, expression: exp.JSONExtract): 1106 this = expression.this 1107 1108 # JSON strings are valid coming from other dialects such as BQ 1109 return self.func( 1110 "GET_PATH", 1111 exp.ParseJSON(this=this) if this.is_string else this, 1112 expression.expression, 1113 )
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
280 def quote_identifier(self, expression: E, identify: bool = True) -> E: 281 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 282 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 283 if ( 284 isinstance(expression, exp.Identifier) 285 and isinstance(expression.parent, exp.Table) 286 and expression.name.lower() == "dual" 287 ): 288 return expression # type: ignore 289 290 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
292 class Parser(parser.Parser): 293 IDENTIFY_PIVOT_STRINGS = True 294 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 295 COLON_IS_VARIANT_EXTRACT = True 296 297 ID_VAR_TOKENS = { 298 *parser.Parser.ID_VAR_TOKENS, 299 TokenType.MATCH_CONDITION, 300 } 301 302 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 303 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 304 305 FUNCTIONS = { 306 **parser.Parser.FUNCTIONS, 307 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 308 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 309 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 310 this=seq_get(args, 1), expression=seq_get(args, 0) 311 ), 312 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 313 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 314 start=seq_get(args, 0), 315 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 316 step=seq_get(args, 2), 317 ), 318 "BITXOR": binary_from_function(exp.BitwiseXor), 319 "BIT_XOR": binary_from_function(exp.BitwiseXor), 320 "BOOLXOR": binary_from_function(exp.Xor), 321 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 322 "DATE_TRUNC": _date_trunc_to_time, 323 "DATEADD": _build_date_time_add(exp.DateAdd), 324 "DATEDIFF": _build_datediff, 325 "DIV0": _build_if_from_div0, 326 "FLATTEN": exp.Explode.from_arg_list, 327 "GET_PATH": lambda args, dialect: exp.JSONExtract( 328 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 329 ), 330 "IFF": exp.If.from_arg_list, 331 "LAST_DAY": lambda args: exp.LastDay( 332 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 333 ), 334 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 335 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 336 "LISTAGG": exp.GroupConcat.from_arg_list, 337 "NULLIFZERO": _build_if_from_nullifzero, 338 "OBJECT_CONSTRUCT": _build_object_construct, 339 "REGEXP_REPLACE": _build_regexp_replace, 340 "REGEXP_SUBSTR": lambda args: exp.RegexpExtract( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 position=seq_get(args, 2), 344 occurrence=seq_get(args, 3), 345 parameters=seq_get(args, 4), 346 group=seq_get(args, 5) or exp.Literal.number(0), 347 ), 348 "RLIKE": exp.RegexpLike.from_arg_list, 349 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 350 "TIMEADD": _build_date_time_add(exp.TimeAdd), 351 "TIMEDIFF": _build_datediff, 352 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 353 "TIMESTAMPDIFF": _build_datediff, 354 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 355 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 356 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 357 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 358 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 359 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 360 "TRY_TO_TIMESTAMP": _build_datetime( 361 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 362 ), 363 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 364 "TO_NUMBER": lambda args: exp.ToNumber( 365 this=seq_get(args, 0), 366 format=seq_get(args, 1), 367 precision=seq_get(args, 2), 368 scale=seq_get(args, 3), 369 ), 370 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 371 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 372 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 373 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 374 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 375 "TO_VARCHAR": exp.ToChar.from_arg_list, 376 "ZEROIFNULL": _build_if_from_zeroifnull, 377 } 378 379 FUNCTION_PARSERS = { 380 **parser.Parser.FUNCTION_PARSERS, 381 "DATE_PART": lambda self: self._parse_date_part(), 382 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 383 } 384 FUNCTION_PARSERS.pop("TRIM") 385 386 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 387 388 RANGE_PARSERS = { 389 **parser.Parser.RANGE_PARSERS, 390 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 391 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 392 } 393 394 ALTER_PARSERS = { 395 **parser.Parser.ALTER_PARSERS, 396 "UNSET": lambda self: self.expression( 397 exp.Set, 398 tag=self._match_text_seq("TAG"), 399 expressions=self._parse_csv(self._parse_id_var), 400 unset=True, 401 ), 402 } 403 404 STATEMENT_PARSERS = { 405 **parser.Parser.STATEMENT_PARSERS, 406 TokenType.SHOW: lambda self: self._parse_show(), 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "LOCATION": lambda self: self._parse_location_property(), 412 } 413 414 TYPE_CONVERTERS = { 415 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 416 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 417 } 418 419 SHOW_PARSERS = { 420 "SCHEMAS": _show_parser("SCHEMAS"), 421 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 422 "OBJECTS": _show_parser("OBJECTS"), 423 "TERSE OBJECTS": _show_parser("OBJECTS"), 424 "TABLES": _show_parser("TABLES"), 425 "TERSE TABLES": _show_parser("TABLES"), 426 "VIEWS": _show_parser("VIEWS"), 427 "TERSE VIEWS": _show_parser("VIEWS"), 428 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 429 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 430 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 431 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 432 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 433 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 434 "SEQUENCES": _show_parser("SEQUENCES"), 435 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 436 "COLUMNS": _show_parser("COLUMNS"), 437 "USERS": _show_parser("USERS"), 438 "TERSE USERS": _show_parser("USERS"), 439 } 440 441 CONSTRAINT_PARSERS = { 442 **parser.Parser.CONSTRAINT_PARSERS, 443 "WITH": lambda self: self._parse_with_constraint(), 444 "MASKING": lambda self: self._parse_with_constraint(), 445 "PROJECTION": lambda self: self._parse_with_constraint(), 446 "TAG": lambda self: self._parse_with_constraint(), 447 } 448 449 STAGED_FILE_SINGLE_TOKENS = { 450 TokenType.DOT, 451 TokenType.MOD, 452 TokenType.SLASH, 453 } 454 455 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 456 457 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 458 459 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 460 461 LAMBDAS = { 462 **parser.Parser.LAMBDAS, 463 TokenType.ARROW: lambda self, expressions: self.expression( 464 exp.Lambda, 465 this=self._replace_lambda( 466 self._parse_assignment(), 467 expressions, 468 ), 469 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 470 ), 471 } 472 473 def _negate_range( 474 self, this: t.Optional[exp.Expression] = None 475 ) -> t.Optional[exp.Expression]: 476 if not this: 477 return this 478 479 query = this.args.get("query") 480 if isinstance(this, exp.In) and isinstance(query, exp.Query): 481 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 482 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 483 # which can produce different results (most likely a SnowFlake bug). 484 # 485 # https://docs.snowflake.com/en/sql-reference/functions/in 486 # Context: https://github.com/tobymao/sqlglot/issues/3890 487 return self.expression( 488 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 489 ) 490 491 return self.expression(exp.Not, this=this) 492 493 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 494 if self._prev.token_type != TokenType.WITH: 495 self._retreat(self._index - 1) 496 497 if self._match_text_seq("MASKING", "POLICY"): 498 policy = self._parse_column() 499 return self.expression( 500 exp.MaskingPolicyColumnConstraint, 501 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 502 expressions=self._match(TokenType.USING) 503 and self._parse_wrapped_csv(self._parse_id_var), 504 ) 505 if self._match_text_seq("PROJECTION", "POLICY"): 506 policy = self._parse_column() 507 return self.expression( 508 exp.ProjectionPolicyColumnConstraint, 509 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 510 ) 511 if self._match(TokenType.TAG): 512 return self.expression( 513 exp.TagColumnConstraint, 514 expressions=self._parse_wrapped_csv(self._parse_property), 515 ) 516 517 return None 518 519 def _parse_create(self) -> exp.Create | exp.Command: 520 expression = super()._parse_create() 521 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 522 # Replace the Table node with the enclosed Identifier 523 expression.this.replace(expression.this.this) 524 525 return expression 526 527 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 528 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 529 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 530 this = self._parse_var() or self._parse_type() 531 532 if not this: 533 return None 534 535 self._match(TokenType.COMMA) 536 expression = self._parse_bitwise() 537 this = map_date_part(this) 538 name = this.name.upper() 539 540 if name.startswith("EPOCH"): 541 if name == "EPOCH_MILLISECOND": 542 scale = 10**3 543 elif name == "EPOCH_MICROSECOND": 544 scale = 10**6 545 elif name == "EPOCH_NANOSECOND": 546 scale = 10**9 547 else: 548 scale = None 549 550 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 551 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 552 553 if scale: 554 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 555 556 return to_unix 557 558 return self.expression(exp.Extract, this=this, expression=expression) 559 560 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 561 if is_map: 562 # Keys are strings in Snowflake's objects, see also: 563 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 564 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 565 return self._parse_slice(self._parse_string()) 566 567 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 568 569 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 570 lateral = super()._parse_lateral() 571 if not lateral: 572 return lateral 573 574 if isinstance(lateral.this, exp.Explode): 575 table_alias = lateral.args.get("alias") 576 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 577 if table_alias and not table_alias.args.get("columns"): 578 table_alias.set("columns", columns) 579 elif not table_alias: 580 exp.alias_(lateral, "_flattened", table=columns, copy=False) 581 582 return lateral 583 584 def _parse_table_parts( 585 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 586 ) -> exp.Table: 587 # https://docs.snowflake.com/en/user-guide/querying-stage 588 if self._match(TokenType.STRING, advance=False): 589 table = self._parse_string() 590 elif self._match_text_seq("@", advance=False): 591 table = self._parse_location_path() 592 else: 593 table = None 594 595 if table: 596 file_format = None 597 pattern = None 598 599 wrapped = self._match(TokenType.L_PAREN) 600 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 601 if self._match_text_seq("FILE_FORMAT", "=>"): 602 file_format = self._parse_string() or super()._parse_table_parts( 603 is_db_reference=is_db_reference 604 ) 605 elif self._match_text_seq("PATTERN", "=>"): 606 pattern = self._parse_string() 607 else: 608 break 609 610 self._match(TokenType.COMMA) 611 612 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 613 else: 614 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 615 616 return table 617 618 def _parse_id_var( 619 self, 620 any_token: bool = True, 621 tokens: t.Optional[t.Collection[TokenType]] = None, 622 ) -> t.Optional[exp.Expression]: 623 if self._match_text_seq("IDENTIFIER", "("): 624 identifier = ( 625 super()._parse_id_var(any_token=any_token, tokens=tokens) 626 or self._parse_string() 627 ) 628 self._match_r_paren() 629 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 630 631 return super()._parse_id_var(any_token=any_token, tokens=tokens) 632 633 def _parse_show_snowflake(self, this: str) -> exp.Show: 634 scope = None 635 scope_kind = None 636 637 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 638 # which is syntactically valid but has no effect on the output 639 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 640 641 history = self._match_text_seq("HISTORY") 642 643 like = self._parse_string() if self._match(TokenType.LIKE) else None 644 645 if self._match(TokenType.IN): 646 if self._match_text_seq("ACCOUNT"): 647 scope_kind = "ACCOUNT" 648 elif self._match_set(self.DB_CREATABLES): 649 scope_kind = self._prev.text.upper() 650 if self._curr: 651 scope = self._parse_table_parts() 652 elif self._curr: 653 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 654 scope = self._parse_table_parts() 655 656 return self.expression( 657 exp.Show, 658 **{ 659 "terse": terse, 660 "this": this, 661 "history": history, 662 "like": like, 663 "scope": scope, 664 "scope_kind": scope_kind, 665 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 666 "limit": self._parse_limit(), 667 "from": self._parse_string() if self._match(TokenType.FROM) else None, 668 }, 669 ) 670 671 def _parse_location_property(self) -> exp.LocationProperty: 672 self._match(TokenType.EQ) 673 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 674 675 def _parse_file_location(self) -> t.Optional[exp.Expression]: 676 # Parse either a subquery or a staged file 677 return ( 678 self._parse_select(table=True, parse_subquery_alias=False) 679 if self._match(TokenType.L_PAREN, advance=False) 680 else self._parse_table_parts() 681 ) 682 683 def _parse_location_path(self) -> exp.Var: 684 parts = [self._advance_any(ignore_reserved=True)] 685 686 # We avoid consuming a comma token because external tables like @foo and @bar 687 # can be joined in a query with a comma separator, as well as closing paren 688 # in case of subqueries 689 while self._is_connected() and not self._match_set( 690 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 691 ): 692 parts.append(self._advance_any(ignore_reserved=True)) 693 694 return exp.var("".join(part.text for part in parts if part)) 695 696 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 697 this = super()._parse_lambda_arg() 698 699 if not this: 700 return this 701 702 typ = self._parse_types() 703 704 if typ: 705 return self.expression(exp.Cast, this=this, to=typ) 706 707 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
709 class Tokenizer(tokens.Tokenizer): 710 STRING_ESCAPES = ["\\", "'"] 711 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 712 RAW_STRINGS = ["$$"] 713 COMMENTS = ["--", "//", ("/*", "*/")] 714 NESTED_COMMENTS = False 715 716 KEYWORDS = { 717 **tokens.Tokenizer.KEYWORDS, 718 "BYTEINT": TokenType.INT, 719 "CHAR VARYING": TokenType.VARCHAR, 720 "CHARACTER VARYING": TokenType.VARCHAR, 721 "EXCLUDE": TokenType.EXCEPT, 722 "ILIKE ANY": TokenType.ILIKE_ANY, 723 "LIKE ANY": TokenType.LIKE_ANY, 724 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 725 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 726 "MINUS": TokenType.EXCEPT, 727 "NCHAR VARYING": TokenType.VARCHAR, 728 "PUT": TokenType.COMMAND, 729 "REMOVE": TokenType.COMMAND, 730 "RM": TokenType.COMMAND, 731 "SAMPLE": TokenType.TABLE_SAMPLE, 732 "SQL_DOUBLE": TokenType.DOUBLE, 733 "SQL_VARCHAR": TokenType.VARCHAR, 734 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 735 "TAG": TokenType.TAG, 736 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 737 "TOP": TokenType.TOP, 738 "WAREHOUSE": TokenType.WAREHOUSE, 739 "STREAMLIT": TokenType.STREAMLIT, 740 } 741 KEYWORDS.pop("/*+") 742 743 SINGLE_TOKENS = { 744 **tokens.Tokenizer.SINGLE_TOKENS, 745 "$": TokenType.PARAMETER, 746 } 747 748 VAR_SINGLE_TOKENS = {"$"} 749 750 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
752 class Generator(generator.Generator): 753 PARAMETER_TOKEN = "$" 754 MATCHED_BY_SOURCE = False 755 SINGLE_STRING_INTERVAL = True 756 JOIN_HINTS = False 757 TABLE_HINTS = False 758 QUERY_HINTS = False 759 AGGREGATE_FILTER_SUPPORTED = False 760 SUPPORTS_TABLE_COPY = False 761 COLLATE_IS_FUNC = True 762 LIMIT_ONLY_LITERALS = True 763 JSON_KEY_VALUE_PAIR_SEP = "," 764 INSERT_OVERWRITE = " OVERWRITE INTO" 765 STRUCT_DELIMITER = ("(", ")") 766 COPY_PARAMS_ARE_WRAPPED = False 767 COPY_PARAMS_EQ_REQUIRED = True 768 STAR_EXCEPT = "EXCLUDE" 769 SUPPORTS_EXPLODING_PROJECTIONS = False 770 ARRAY_CONCAT_IS_VAR_LEN = False 771 SUPPORTS_CONVERT_TIMEZONE = True 772 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 773 SUPPORTS_MEDIAN = True 774 775 TRANSFORMS = { 776 **generator.Generator.TRANSFORMS, 777 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 778 exp.ArgMax: rename_func("MAX_BY"), 779 exp.ArgMin: rename_func("MIN_BY"), 780 exp.Array: inline_array_sql, 781 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 782 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 783 exp.AtTimeZone: lambda self, e: self.func( 784 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 785 ), 786 exp.BitwiseXor: rename_func("BITXOR"), 787 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 788 exp.DateAdd: date_delta_sql("DATEADD"), 789 exp.DateDiff: date_delta_sql("DATEDIFF"), 790 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 791 exp.DatetimeDiff: timestampdiff_sql, 792 exp.DateStrToDate: datestrtodate_sql, 793 exp.DayOfMonth: rename_func("DAYOFMONTH"), 794 exp.DayOfWeek: rename_func("DAYOFWEEK"), 795 exp.DayOfYear: rename_func("DAYOFYEAR"), 796 exp.Explode: rename_func("FLATTEN"), 797 exp.Extract: rename_func("DATE_PART"), 798 exp.FromTimeZone: lambda self, e: self.func( 799 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 800 ), 801 exp.GenerateSeries: lambda self, e: self.func( 802 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 803 ), 804 exp.GroupConcat: rename_func("LISTAGG"), 805 exp.If: if_sql(name="IFF", false_value="NULL"), 806 exp.JSONExtractScalar: lambda self, e: self.func( 807 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 808 ), 809 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 810 exp.JSONPathRoot: lambda *_: "", 811 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 812 exp.LogicalOr: rename_func("BOOLOR_AGG"), 813 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 814 exp.Max: max_or_greatest, 815 exp.Min: min_or_least, 816 exp.ParseJSON: lambda self, e: self.func( 817 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 818 ), 819 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 820 exp.PercentileCont: transforms.preprocess( 821 [transforms.add_within_group_for_percentiles] 822 ), 823 exp.PercentileDisc: transforms.preprocess( 824 [transforms.add_within_group_for_percentiles] 825 ), 826 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 827 exp.RegexpILike: _regexpilike_sql, 828 exp.Rand: rename_func("RANDOM"), 829 exp.Select: transforms.preprocess( 830 [ 831 transforms.eliminate_distinct_on, 832 transforms.explode_to_unnest(), 833 transforms.eliminate_semi_and_anti_joins, 834 _unnest_generate_date_array, 835 ] 836 ), 837 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 838 exp.SHA: rename_func("SHA1"), 839 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 840 exp.StartsWith: rename_func("STARTSWITH"), 841 exp.StrPosition: lambda self, e: self.func( 842 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 843 ), 844 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 845 exp.Stuff: rename_func("INSERT"), 846 exp.TimeAdd: date_delta_sql("TIMEADD"), 847 exp.Timestamp: no_timestamp_sql, 848 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 849 exp.TimestampDiff: lambda self, e: self.func( 850 "TIMESTAMPDIFF", e.unit, e.expression, e.this 851 ), 852 exp.TimestampTrunc: timestamptrunc_sql(), 853 exp.TimeStrToTime: timestrtotime_sql, 854 exp.TimeToStr: lambda self, e: self.func( 855 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 856 ), 857 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 858 exp.ToArray: rename_func("TO_ARRAY"), 859 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 860 exp.ToDouble: rename_func("TO_DOUBLE"), 861 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 862 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 863 exp.TsOrDsToDate: lambda self, e: self.func( 864 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 865 ), 866 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 867 exp.Uuid: rename_func("UUID_STRING"), 868 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 869 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 870 exp.Xor: rename_func("BOOLXOR"), 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 exp.DataType.Type.NESTED: "OBJECT", 882 exp.DataType.Type.STRUCT: "OBJECT", 883 } 884 885 PROPERTIES_LOCATION = { 886 **generator.Generator.PROPERTIES_LOCATION, 887 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 888 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 889 } 890 891 UNSUPPORTED_VALUES_EXPRESSIONS = { 892 exp.Map, 893 exp.StarMap, 894 exp.Struct, 895 exp.VarMap, 896 } 897 898 def with_properties(self, properties: exp.Properties) -> str: 899 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 900 901 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 902 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 903 values_as_table = False 904 905 return super().values_sql(expression, values_as_table=values_as_table) 906 907 def datatype_sql(self, expression: exp.DataType) -> str: 908 expressions = expression.expressions 909 if ( 910 expressions 911 and expression.is_type(*exp.DataType.STRUCT_TYPES) 912 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 913 ): 914 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 915 return "OBJECT" 916 917 return super().datatype_sql(expression) 918 919 def tonumber_sql(self, expression: exp.ToNumber) -> str: 920 return self.func( 921 "TO_NUMBER", 922 expression.this, 923 expression.args.get("format"), 924 expression.args.get("precision"), 925 expression.args.get("scale"), 926 ) 927 928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 milli = expression.args.get("milli") 930 if milli is not None: 931 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 932 expression.set("nano", milli_to_nano) 933 934 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 935 936 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 937 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 938 return self.func("TO_GEOGRAPHY", expression.this) 939 if expression.is_type(exp.DataType.Type.GEOMETRY): 940 return self.func("TO_GEOMETRY", expression.this) 941 942 return super().cast_sql(expression, safe_prefix=safe_prefix) 943 944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 value = expression.this 946 947 if value.type is None: 948 from sqlglot.optimizer.annotate_types import annotate_types 949 950 value = annotate_types(value) 951 952 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 953 return super().trycast_sql(expression) 954 955 # TRY_CAST only works for string values in Snowflake 956 return self.cast_sql(expression) 957 958 def log_sql(self, expression: exp.Log) -> str: 959 if not expression.expression: 960 return self.func("LN", expression.this) 961 962 return super().log_sql(expression) 963 964 def unnest_sql(self, expression: exp.Unnest) -> str: 965 unnest_alias = expression.args.get("alias") 966 offset = expression.args.get("offset") 967 968 columns = [ 969 exp.to_identifier("seq"), 970 exp.to_identifier("key"), 971 exp.to_identifier("path"), 972 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 973 seq_get(unnest_alias.columns if unnest_alias else [], 0) 974 or exp.to_identifier("value"), 975 exp.to_identifier("this"), 976 ] 977 978 if unnest_alias: 979 unnest_alias.set("columns", columns) 980 else: 981 unnest_alias = exp.TableAlias(this="_u", columns=columns) 982 983 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 984 alias = self.sql(unnest_alias) 985 alias = f" AS {alias}" if alias else "" 986 return f"{explode}{alias}" 987 988 def show_sql(self, expression: exp.Show) -> str: 989 terse = "TERSE " if expression.args.get("terse") else "" 990 history = " HISTORY" if expression.args.get("history") else "" 991 like = self.sql(expression, "like") 992 like = f" LIKE {like}" if like else "" 993 994 scope = self.sql(expression, "scope") 995 scope = f" {scope}" if scope else "" 996 997 scope_kind = self.sql(expression, "scope_kind") 998 if scope_kind: 999 scope_kind = f" IN {scope_kind}" 1000 1001 starts_with = self.sql(expression, "starts_with") 1002 if starts_with: 1003 starts_with = f" STARTS WITH {starts_with}" 1004 1005 limit = self.sql(expression, "limit") 1006 1007 from_ = self.sql(expression, "from") 1008 if from_: 1009 from_ = f" FROM {from_}" 1010 1011 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1012 1013 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1014 # Other dialects don't support all of the following parameters, so we need to 1015 # generate default values as necessary to ensure the transpilation is correct 1016 group = expression.args.get("group") 1017 1018 # To avoid generating all these default values, we set group to None if 1019 # it's 0 (also default value) which doesn't trigger the following chain 1020 if group and group.name == "0": 1021 group = None 1022 1023 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1024 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1025 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1026 1027 return self.func( 1028 "REGEXP_SUBSTR", 1029 expression.this, 1030 expression.expression, 1031 position, 1032 occurrence, 1033 parameters, 1034 group, 1035 ) 1036 1037 def describe_sql(self, expression: exp.Describe) -> str: 1038 # Default to table if kind is unknown 1039 kind_value = expression.args.get("kind") or "TABLE" 1040 kind = f" {kind_value}" if kind_value else "" 1041 this = f" {self.sql(expression, 'this')}" 1042 expressions = self.expressions(expression, flat=True) 1043 expressions = f" {expressions}" if expressions else "" 1044 return f"DESCRIBE{kind}{this}{expressions}" 1045 1046 def generatedasidentitycolumnconstraint_sql( 1047 self, expression: exp.GeneratedAsIdentityColumnConstraint 1048 ) -> str: 1049 start = expression.args.get("start") 1050 start = f" START {start}" if start else "" 1051 increment = expression.args.get("increment") 1052 increment = f" INCREMENT {increment}" if increment else "" 1053 return f"AUTOINCREMENT{start}{increment}" 1054 1055 def cluster_sql(self, expression: exp.Cluster) -> str: 1056 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1057 1058 def struct_sql(self, expression: exp.Struct) -> str: 1059 keys = [] 1060 values = [] 1061 1062 for i, e in enumerate(expression.expressions): 1063 if isinstance(e, exp.PropertyEQ): 1064 keys.append( 1065 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1066 ) 1067 values.append(e.expression) 1068 else: 1069 keys.append(exp.Literal.string(f"_{i}")) 1070 values.append(e) 1071 1072 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1073 1074 @unsupported_args("weight", "accuracy") 1075 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1076 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1077 1078 def alterset_sql(self, expression: exp.AlterSet) -> str: 1079 exprs = self.expressions(expression, flat=True) 1080 exprs = f" {exprs}" if exprs else "" 1081 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1082 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1083 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1084 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1085 tag = self.expressions(expression, key="tag", flat=True) 1086 tag = f" TAG {tag}" if tag else "" 1087 1088 return f"SET{exprs}{file_format}{copy_options}{tag}" 1089 1090 def strtotime_sql(self, expression: exp.StrToTime): 1091 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1092 return self.func( 1093 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1094 ) 1095 1096 def timestampsub_sql(self, expression: exp.TimestampSub): 1097 return self.sql( 1098 exp.TimestampAdd( 1099 this=expression.this, 1100 expression=expression.expression * -1, 1101 unit=expression.unit, 1102 ) 1103 ) 1104 1105 def jsonextract_sql(self, expression: exp.JSONExtract): 1106 this = expression.this 1107 1108 # JSON strings are valid coming from other dialects such as BQ 1109 return self.func( 1110 "GET_PATH", 1111 exp.ParseJSON(this=this) if this.is_string else this, 1112 expression.expression, 1113 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
907 def datatype_sql(self, expression: exp.DataType) -> str: 908 expressions = expression.expressions 909 if ( 910 expressions 911 and expression.is_type(*exp.DataType.STRUCT_TYPES) 912 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 913 ): 914 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 915 return "OBJECT" 916 917 return super().datatype_sql(expression)
928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 milli = expression.args.get("milli") 930 if milli is not None: 931 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 932 expression.set("nano", milli_to_nano) 933 934 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
936 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 937 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 938 return self.func("TO_GEOGRAPHY", expression.this) 939 if expression.is_type(exp.DataType.Type.GEOMETRY): 940 return self.func("TO_GEOMETRY", expression.this) 941 942 return super().cast_sql(expression, safe_prefix=safe_prefix)
944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 value = expression.this 946 947 if value.type is None: 948 from sqlglot.optimizer.annotate_types import annotate_types 949 950 value = annotate_types(value) 951 952 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 953 return super().trycast_sql(expression) 954 955 # TRY_CAST only works for string values in Snowflake 956 return self.cast_sql(expression)
964 def unnest_sql(self, expression: exp.Unnest) -> str: 965 unnest_alias = expression.args.get("alias") 966 offset = expression.args.get("offset") 967 968 columns = [ 969 exp.to_identifier("seq"), 970 exp.to_identifier("key"), 971 exp.to_identifier("path"), 972 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 973 seq_get(unnest_alias.columns if unnest_alias else [], 0) 974 or exp.to_identifier("value"), 975 exp.to_identifier("this"), 976 ] 977 978 if unnest_alias: 979 unnest_alias.set("columns", columns) 980 else: 981 unnest_alias = exp.TableAlias(this="_u", columns=columns) 982 983 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 984 alias = self.sql(unnest_alias) 985 alias = f" AS {alias}" if alias else "" 986 return f"{explode}{alias}"
988 def show_sql(self, expression: exp.Show) -> str: 989 terse = "TERSE " if expression.args.get("terse") else "" 990 history = " HISTORY" if expression.args.get("history") else "" 991 like = self.sql(expression, "like") 992 like = f" LIKE {like}" if like else "" 993 994 scope = self.sql(expression, "scope") 995 scope = f" {scope}" if scope else "" 996 997 scope_kind = self.sql(expression, "scope_kind") 998 if scope_kind: 999 scope_kind = f" IN {scope_kind}" 1000 1001 starts_with = self.sql(expression, "starts_with") 1002 if starts_with: 1003 starts_with = f" STARTS WITH {starts_with}" 1004 1005 limit = self.sql(expression, "limit") 1006 1007 from_ = self.sql(expression, "from") 1008 if from_: 1009 from_ = f" FROM {from_}" 1010 1011 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1013 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1014 # Other dialects don't support all of the following parameters, so we need to 1015 # generate default values as necessary to ensure the transpilation is correct 1016 group = expression.args.get("group") 1017 1018 # To avoid generating all these default values, we set group to None if 1019 # it's 0 (also default value) which doesn't trigger the following chain 1020 if group and group.name == "0": 1021 group = None 1022 1023 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 1024 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 1025 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 1026 1027 return self.func( 1028 "REGEXP_SUBSTR", 1029 expression.this, 1030 expression.expression, 1031 position, 1032 occurrence, 1033 parameters, 1034 group, 1035 )
1037 def describe_sql(self, expression: exp.Describe) -> str: 1038 # Default to table if kind is unknown 1039 kind_value = expression.args.get("kind") or "TABLE" 1040 kind = f" {kind_value}" if kind_value else "" 1041 this = f" {self.sql(expression, 'this')}" 1042 expressions = self.expressions(expression, flat=True) 1043 expressions = f" {expressions}" if expressions else "" 1044 return f"DESCRIBE{kind}{this}{expressions}"
1046 def generatedasidentitycolumnconstraint_sql( 1047 self, expression: exp.GeneratedAsIdentityColumnConstraint 1048 ) -> str: 1049 start = expression.args.get("start") 1050 start = f" START {start}" if start else "" 1051 increment = expression.args.get("increment") 1052 increment = f" INCREMENT {increment}" if increment else "" 1053 return f"AUTOINCREMENT{start}{increment}"
1058 def struct_sql(self, expression: exp.Struct) -> str: 1059 keys = [] 1060 values = [] 1061 1062 for i, e in enumerate(expression.expressions): 1063 if isinstance(e, exp.PropertyEQ): 1064 keys.append( 1065 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1066 ) 1067 values.append(e.expression) 1068 else: 1069 keys.append(exp.Literal.string(f"_{i}")) 1070 values.append(e) 1071 1072 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1078 def alterset_sql(self, expression: exp.AlterSet) -> str: 1079 exprs = self.expressions(expression, flat=True) 1080 exprs = f" {exprs}" if exprs else "" 1081 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1082 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1083 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1084 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1085 tag = self.expressions(expression, key="tag", flat=True) 1086 tag = f" TAG {tag}" if tag else "" 1087 1088 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql