sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34) 35from sqlglot.dialects.hive import Hive 36from sqlglot.dialects.mysql import MySQL 37from sqlglot.helper import apply_index_offset, seq_get 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40 41DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 42 43 44def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 45 regex = r"(\w)(\w*)" 46 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 47 48 49def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 50 if expression.args.get("asc") == exp.false(): 51 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 52 else: 53 comparator = None 54 return self.func("ARRAY_SORT", expression.this, comparator) 55 56 57def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 58 if isinstance(expression.parent, exp.Property): 59 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 60 return f"ARRAY[{columns}]" 61 62 if expression.parent: 63 for schema in expression.parent.find_all(exp.Schema): 64 column_defs = schema.find_all(exp.ColumnDef) 65 if column_defs and isinstance(schema.parent, exp.Property): 66 expression.expressions.extend(column_defs) 67 68 return self.schema_sql(expression) 69 70 71def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 72 self.unsupported("Presto does not support exact quantiles") 73 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 74 75 76def _str_to_time_sql( 77 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 78) -> str: 79 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 80 81 82def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 83 time_format = self.format_time(expression) 84 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 85 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 86 return self.sql( 87 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 88 ) 89 90 91def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 92 expression = ts_or_ds_add_cast(expression) 93 unit = unit_to_str(expression) 94 return self.func("DATE_ADD", unit, expression.expression, expression.this) 95 96 97def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 98 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 99 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 100 unit = unit_to_str(expression) 101 return self.func("DATE_DIFF", unit, expr, this) 102 103 104def _build_approx_percentile(args: t.List) -> exp.Expression: 105 if len(args) == 4: 106 return exp.ApproxQuantile( 107 this=seq_get(args, 0), 108 weight=seq_get(args, 1), 109 quantile=seq_get(args, 2), 110 accuracy=seq_get(args, 3), 111 ) 112 if len(args) == 3: 113 return exp.ApproxQuantile( 114 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 115 ) 116 return exp.ApproxQuantile.from_arg_list(args) 117 118 119def _build_from_unixtime(args: t.List) -> exp.Expression: 120 if len(args) == 3: 121 return exp.UnixToTime( 122 this=seq_get(args, 0), 123 hours=seq_get(args, 1), 124 minutes=seq_get(args, 2), 125 ) 126 if len(args) == 2: 127 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 128 129 return exp.UnixToTime.from_arg_list(args) 130 131 132def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 133 """ 134 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 135 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 136 they're converted into an ARBITRARY call. 137 138 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 139 """ 140 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 141 return self.function_fallback_sql(expression) 142 143 return rename_func("ARBITRARY")(self, expression) 144 145 146def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 147 scale = expression.args.get("scale") 148 timestamp = self.sql(expression, "this") 149 if scale in (None, exp.UnixToTime.SECONDS): 150 return rename_func("FROM_UNIXTIME")(self, expression) 151 152 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 153 154 155def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 156 if not expression.type: 157 from sqlglot.optimizer.annotate_types import annotate_types 158 159 annotate_types(expression, dialect=self.dialect) 160 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 161 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 162 return expression 163 164 165def _build_to_char(args: t.List) -> exp.TimeToStr: 166 fmt = seq_get(args, 1) 167 if isinstance(fmt, exp.Literal): 168 # We uppercase this to match Teradata's format mapping keys 169 fmt.set("this", fmt.this.upper()) 170 171 # We use "teradata" on purpose here, because the time formats are different in Presto. 172 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 173 return build_formatted_time(exp.TimeToStr, "teradata")(args) 174 175 176def _date_delta_sql( 177 name: str, negate_interval: bool = False 178) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 179 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 180 interval = _to_int(self, expression.expression) 181 return self.func( 182 name, 183 unit_to_str(expression), 184 interval * (-1) if negate_interval else interval, 185 expression.this, 186 ) 187 188 return _delta_sql 189 190 191class Presto(Dialect): 192 INDEX_OFFSET = 1 193 NULL_ORDERING = "nulls_are_last" 194 TIME_FORMAT = MySQL.TIME_FORMAT 195 STRICT_STRING_CONCAT = True 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 TYPED_DIVISION = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 LOG_BASE_FIRST: t.Optional[bool] = None 200 201 TIME_MAPPING = MySQL.TIME_MAPPING 202 203 # https://github.com/trinodb/trino/issues/17 204 # https://github.com/trinodb/trino/issues/12289 205 # https://github.com/prestodb/presto/issues/2863 206 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 207 208 # The result of certain math functions in Presto/Trino is of type 209 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 210 ANNOTATORS = { 211 **Dialect.ANNOTATORS, 212 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 213 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 214 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 215 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 216 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 217 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 218 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 219 if e.this 220 else self._set_type(e, exp.DataType.Type.DOUBLE), 221 } 222 223 class Tokenizer(tokens.Tokenizer): 224 UNICODE_STRINGS = [ 225 (prefix + q, q) 226 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 227 for prefix in ("U&", "u&") 228 ] 229 230 KEYWORDS = { 231 **tokens.Tokenizer.KEYWORDS, 232 "START": TokenType.BEGIN, 233 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 234 "ROW": TokenType.STRUCT, 235 "IPADDRESS": TokenType.IPADDRESS, 236 "IPPREFIX": TokenType.IPPREFIX, 237 "TDIGEST": TokenType.TDIGEST, 238 "HYPERLOGLOG": TokenType.HLLSKETCH, 239 } 240 KEYWORDS.pop("/*+") 241 KEYWORDS.pop("QUALIFY") 242 243 class Parser(parser.Parser): 244 VALUES_FOLLOWED_BY_PAREN = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ARBITRARY": exp.AnyValue.from_arg_list, 249 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 250 "APPROX_PERCENTILE": _build_approx_percentile, 251 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 252 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 253 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 254 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 255 "CARDINALITY": exp.ArraySize.from_arg_list, 256 "CONTAINS": exp.ArrayContains.from_arg_list, 257 "DATE_ADD": lambda args: exp.DateAdd( 258 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 259 ), 260 "DATE_DIFF": lambda args: exp.DateDiff( 261 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 262 ), 263 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 264 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 265 "DATE_TRUNC": date_trunc_to_time, 266 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 267 "ELEMENT_AT": lambda args: exp.Bracket( 268 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 269 ), 270 "FROM_HEX": exp.Unhex.from_arg_list, 271 "FROM_UNIXTIME": _build_from_unixtime, 272 "FROM_UTF8": lambda args: exp.Decode( 273 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 274 ), 275 "NOW": exp.CurrentTimestamp.from_arg_list, 276 "REGEXP_EXTRACT": build_regexp_extract, 277 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 278 this=seq_get(args, 0), 279 expression=seq_get(args, 1), 280 replacement=seq_get(args, 2) or exp.Literal.string(""), 281 ), 282 "ROW": exp.Struct.from_arg_list, 283 "SEQUENCE": exp.GenerateSeries.from_arg_list, 284 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 285 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 286 "STRPOS": lambda args: exp.StrPosition( 287 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 288 ), 289 "TO_CHAR": _build_to_char, 290 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 291 "TO_UTF8": lambda args: exp.Encode( 292 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 293 ), 294 "MD5": exp.MD5Digest.from_arg_list, 295 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 296 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 297 } 298 299 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 300 FUNCTION_PARSERS.pop("TRIM") 301 302 class Generator(generator.Generator): 303 INTERVAL_ALLOWS_PLURAL_FORM = False 304 JOIN_HINTS = False 305 TABLE_HINTS = False 306 QUERY_HINTS = False 307 IS_BOOL_ALLOWED = False 308 TZ_TO_WITH_TIME_ZONE = True 309 NVL2_SUPPORTED = False 310 STRUCT_DELIMITER = ("(", ")") 311 LIMIT_ONLY_LITERALS = True 312 SUPPORTS_SINGLE_ARG_CONCAT = False 313 LIKE_PROPERTY_INSIDE_SCHEMA = True 314 MULTI_ARG_DISTINCT = False 315 SUPPORTS_TO_NUMBER = False 316 HEX_FUNC = "TO_HEX" 317 PARSE_JSON_NAME = "JSON_PARSE" 318 PAD_FILL_PATTERN_IS_REQUIRED = True 319 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 320 SUPPORTS_MEDIAN = False 321 322 PROPERTIES_LOCATION = { 323 **generator.Generator.PROPERTIES_LOCATION, 324 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 325 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 326 } 327 328 TYPE_MAPPING = { 329 **generator.Generator.TYPE_MAPPING, 330 exp.DataType.Type.BINARY: "VARBINARY", 331 exp.DataType.Type.BIT: "BOOLEAN", 332 exp.DataType.Type.DATETIME: "TIMESTAMP", 333 exp.DataType.Type.DATETIME64: "TIMESTAMP", 334 exp.DataType.Type.FLOAT: "REAL", 335 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 336 exp.DataType.Type.INT: "INTEGER", 337 exp.DataType.Type.STRUCT: "ROW", 338 exp.DataType.Type.TEXT: "VARCHAR", 339 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 340 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 341 exp.DataType.Type.TIMETZ: "TIME", 342 } 343 344 TRANSFORMS = { 345 **generator.Generator.TRANSFORMS, 346 exp.AnyValue: rename_func("ARBITRARY"), 347 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 348 exp.ArgMax: rename_func("MAX_BY"), 349 exp.ArgMin: rename_func("MIN_BY"), 350 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 351 exp.ArrayAny: rename_func("ANY_MATCH"), 352 exp.ArrayConcat: rename_func("CONCAT"), 353 exp.ArrayContains: rename_func("CONTAINS"), 354 exp.ArraySize: rename_func("CARDINALITY"), 355 exp.ArrayToString: rename_func("ARRAY_JOIN"), 356 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 357 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 358 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 359 exp.BitwiseLeftShift: lambda self, e: self.func( 360 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 361 ), 362 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 363 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 364 exp.BitwiseRightShift: lambda self, e: self.func( 365 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 366 ), 367 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 368 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 369 exp.CurrentTime: lambda *_: "CURRENT_TIME", 370 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 371 exp.DateAdd: _date_delta_sql("DATE_ADD"), 372 exp.DateDiff: lambda self, e: self.func( 373 "DATE_DIFF", unit_to_str(e), e.expression, e.this 374 ), 375 exp.DateStrToDate: datestrtodate_sql, 376 exp.DateToDi: lambda self, 377 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 378 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 379 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 380 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 381 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 382 exp.DiToDate: lambda self, 383 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 384 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 385 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 386 exp.First: _first_last_sql, 387 exp.FirstValue: _first_last_sql, 388 exp.FromTimeZone: lambda self, 389 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 390 exp.GenerateSeries: sequence_sql, 391 exp.GenerateDateArray: sequence_sql, 392 exp.Group: transforms.preprocess([transforms.unalias_group]), 393 exp.If: if_sql(), 394 exp.ILike: no_ilike_sql, 395 exp.Initcap: _initcap_sql, 396 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 397 exp.Last: _first_last_sql, 398 exp.LastValue: _first_last_sql, 399 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 400 exp.Lateral: explode_to_unnest_sql, 401 exp.Left: left_to_substring_sql, 402 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 403 exp.LogicalAnd: rename_func("BOOL_AND"), 404 exp.LogicalOr: rename_func("BOOL_OR"), 405 exp.Pivot: no_pivot_sql, 406 exp.Quantile: _quantile_sql, 407 exp.RegexpExtract: regexp_extract_sql, 408 exp.Right: right_to_substring_sql, 409 exp.SafeDivide: no_safe_divide_sql, 410 exp.Schema: _schema_sql, 411 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 412 exp.Select: transforms.preprocess( 413 [ 414 transforms.eliminate_qualify, 415 transforms.eliminate_distinct_on, 416 transforms.explode_to_unnest(1), 417 transforms.eliminate_semi_and_anti_joins, 418 ] 419 ), 420 exp.SortArray: _no_sort_array, 421 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 422 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 423 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 424 exp.StrToTime: _str_to_time_sql, 425 exp.StructExtract: struct_extract_sql, 426 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 427 exp.Timestamp: no_timestamp_sql, 428 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 429 exp.TimestampTrunc: timestamptrunc_sql(), 430 exp.TimeStrToDate: timestrtotime_sql, 431 exp.TimeStrToTime: timestrtotime_sql, 432 exp.TimeStrToUnix: lambda self, e: self.func( 433 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 434 ), 435 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 436 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 437 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 438 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 439 exp.TsOrDiToDi: lambda self, 440 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 441 exp.TsOrDsAdd: _ts_or_ds_add_sql, 442 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 443 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 444 exp.Unhex: rename_func("FROM_HEX"), 445 exp.UnixToStr: lambda self, 446 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 447 exp.UnixToTime: _unix_to_time_sql, 448 exp.UnixToTimeStr: lambda self, 449 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 450 exp.VariancePop: rename_func("VAR_POP"), 451 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 452 exp.WithinGroup: transforms.preprocess( 453 [transforms.remove_within_group_for_percentiles] 454 ), 455 exp.Xor: bool_xor_sql, 456 exp.MD5Digest: rename_func("MD5"), 457 exp.SHA: rename_func("SHA1"), 458 exp.SHA2: sha256_sql, 459 } 460 461 RESERVED_KEYWORDS = { 462 "alter", 463 "and", 464 "as", 465 "between", 466 "by", 467 "case", 468 "cast", 469 "constraint", 470 "create", 471 "cross", 472 "current_time", 473 "current_timestamp", 474 "deallocate", 475 "delete", 476 "describe", 477 "distinct", 478 "drop", 479 "else", 480 "end", 481 "escape", 482 "except", 483 "execute", 484 "exists", 485 "extract", 486 "false", 487 "for", 488 "from", 489 "full", 490 "group", 491 "having", 492 "in", 493 "inner", 494 "insert", 495 "intersect", 496 "into", 497 "is", 498 "join", 499 "left", 500 "like", 501 "natural", 502 "not", 503 "null", 504 "on", 505 "or", 506 "order", 507 "outer", 508 "prepare", 509 "right", 510 "select", 511 "table", 512 "then", 513 "true", 514 "union", 515 "using", 516 "values", 517 "when", 518 "where", 519 "with", 520 } 521 522 def md5_sql(self, expression: exp.MD5) -> str: 523 this = expression.this 524 525 if not this.type: 526 from sqlglot.optimizer.annotate_types import annotate_types 527 528 this = annotate_types(this) 529 530 if this.is_type(*exp.DataType.TEXT_TYPES): 531 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 532 533 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 534 535 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 536 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 537 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 538 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 539 # which seems to be using the same time mapping as Hive, as per: 540 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 541 this = expression.this 542 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 543 value_as_timestamp = ( 544 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 545 ) 546 547 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 548 549 formatted_value = self.func( 550 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 551 ) 552 parse_with_tz = self.func( 553 "PARSE_DATETIME", 554 formatted_value, 555 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 556 ) 557 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 558 return self.func("TO_UNIXTIME", coalesced) 559 560 def bracket_sql(self, expression: exp.Bracket) -> str: 561 if expression.args.get("safe"): 562 return self.func( 563 "ELEMENT_AT", 564 expression.this, 565 seq_get( 566 apply_index_offset( 567 expression.this, 568 expression.expressions, 569 1 - expression.args.get("offset", 0), 570 ), 571 0, 572 ), 573 ) 574 return super().bracket_sql(expression) 575 576 def struct_sql(self, expression: exp.Struct) -> str: 577 from sqlglot.optimizer.annotate_types import annotate_types 578 579 expression = annotate_types(expression) 580 values: t.List[str] = [] 581 schema: t.List[str] = [] 582 unknown_type = False 583 584 for e in expression.expressions: 585 if isinstance(e, exp.PropertyEQ): 586 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 587 unknown_type = True 588 else: 589 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 590 values.append(self.sql(e, "expression")) 591 else: 592 values.append(self.sql(e)) 593 594 size = len(expression.expressions) 595 596 if not size or len(schema) != size: 597 if unknown_type: 598 self.unsupported( 599 "Cannot convert untyped key-value definitions (try annotate_types)." 600 ) 601 return self.func("ROW", *values) 602 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 603 604 def interval_sql(self, expression: exp.Interval) -> str: 605 if expression.this and expression.text("unit").upper().startswith("WEEK"): 606 return f"({expression.this.name} * INTERVAL '7' DAY)" 607 return super().interval_sql(expression) 608 609 def transaction_sql(self, expression: exp.Transaction) -> str: 610 modes = expression.args.get("modes") 611 modes = f" {', '.join(modes)}" if modes else "" 612 return f"START TRANSACTION{modes}" 613 614 def offset_limit_modifiers( 615 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 616 ) -> t.List[str]: 617 return [ 618 self.sql(expression, "offset"), 619 self.sql(limit), 620 ] 621 622 def create_sql(self, expression: exp.Create) -> str: 623 """ 624 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 625 so we need to remove them 626 """ 627 kind = expression.args["kind"] 628 schema = expression.this 629 if kind == "VIEW" and schema.expressions: 630 expression.this.set("expressions", None) 631 return super().create_sql(expression) 632 633 def delete_sql(self, expression: exp.Delete) -> str: 634 """ 635 Presto only supports DELETE FROM for a single table without an alias, so we need 636 to remove the unnecessary parts. If the original DELETE statement contains more 637 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 638 """ 639 tables = expression.args.get("tables") or [expression.this] 640 if len(tables) > 1: 641 return super().delete_sql(expression) 642 643 table = tables[0] 644 expression.set("this", table) 645 expression.set("tables", None) 646 647 if isinstance(table, exp.Table): 648 table_alias = table.args.get("alias") 649 if table_alias: 650 table_alias.pop() 651 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 652 653 return super().delete_sql(expression) 654 655 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 656 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 657 658 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 659 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 660 if not expression.args.get("variant_extract") or is_json_extract: 661 return self.func( 662 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 663 ) 664 665 this = self.sql(expression, "this") 666 667 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 668 segments = [] 669 for path_key in expression.expression.expressions[1:]: 670 if not isinstance(path_key, exp.JSONPathKey): 671 # Cannot transpile subscripts, wildcards etc to dot notation 672 self.unsupported( 673 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 674 ) 675 continue 676 key = path_key.this 677 if not exp.SAFE_IDENTIFIER_RE.match(key): 678 key = f'"{key}"' 679 segments.append(f".{key}") 680 681 expr = "".join(segments) 682 683 return f"{this}{expr}" 684 685 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 686 return self.func( 687 "ARRAY_JOIN", 688 self.func("ARRAY_AGG", expression.this), 689 expression.args.get("separator"), 690 )
192class Presto(Dialect): 193 INDEX_OFFSET = 1 194 NULL_ORDERING = "nulls_are_last" 195 TIME_FORMAT = MySQL.TIME_FORMAT 196 STRICT_STRING_CONCAT = True 197 SUPPORTS_SEMI_ANTI_JOIN = False 198 TYPED_DIVISION = True 199 TABLESAMPLE_SIZE_IS_PERCENT = True 200 LOG_BASE_FIRST: t.Optional[bool] = None 201 202 TIME_MAPPING = MySQL.TIME_MAPPING 203 204 # https://github.com/trinodb/trino/issues/17 205 # https://github.com/trinodb/trino/issues/12289 206 # https://github.com/prestodb/presto/issues/2863 207 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 208 209 # The result of certain math functions in Presto/Trino is of type 210 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 211 ANNOTATORS = { 212 **Dialect.ANNOTATORS, 213 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 214 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 215 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 216 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 217 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 218 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 219 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 220 if e.this 221 else self._set_type(e, exp.DataType.Type.DOUBLE), 222 } 223 224 class Tokenizer(tokens.Tokenizer): 225 UNICODE_STRINGS = [ 226 (prefix + q, q) 227 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 228 for prefix in ("U&", "u&") 229 ] 230 231 KEYWORDS = { 232 **tokens.Tokenizer.KEYWORDS, 233 "START": TokenType.BEGIN, 234 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 235 "ROW": TokenType.STRUCT, 236 "IPADDRESS": TokenType.IPADDRESS, 237 "IPPREFIX": TokenType.IPPREFIX, 238 "TDIGEST": TokenType.TDIGEST, 239 "HYPERLOGLOG": TokenType.HLLSKETCH, 240 } 241 KEYWORDS.pop("/*+") 242 KEYWORDS.pop("QUALIFY") 243 244 class Parser(parser.Parser): 245 VALUES_FOLLOWED_BY_PAREN = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ARBITRARY": exp.AnyValue.from_arg_list, 250 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 251 "APPROX_PERCENTILE": _build_approx_percentile, 252 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 253 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 254 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 255 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 256 "CARDINALITY": exp.ArraySize.from_arg_list, 257 "CONTAINS": exp.ArrayContains.from_arg_list, 258 "DATE_ADD": lambda args: exp.DateAdd( 259 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 260 ), 261 "DATE_DIFF": lambda args: exp.DateDiff( 262 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 263 ), 264 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 265 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 266 "DATE_TRUNC": date_trunc_to_time, 267 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 268 "ELEMENT_AT": lambda args: exp.Bracket( 269 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 270 ), 271 "FROM_HEX": exp.Unhex.from_arg_list, 272 "FROM_UNIXTIME": _build_from_unixtime, 273 "FROM_UTF8": lambda args: exp.Decode( 274 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 275 ), 276 "NOW": exp.CurrentTimestamp.from_arg_list, 277 "REGEXP_EXTRACT": build_regexp_extract, 278 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 279 this=seq_get(args, 0), 280 expression=seq_get(args, 1), 281 replacement=seq_get(args, 2) or exp.Literal.string(""), 282 ), 283 "ROW": exp.Struct.from_arg_list, 284 "SEQUENCE": exp.GenerateSeries.from_arg_list, 285 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 286 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 287 "STRPOS": lambda args: exp.StrPosition( 288 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 289 ), 290 "TO_CHAR": _build_to_char, 291 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 292 "TO_UTF8": lambda args: exp.Encode( 293 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 294 ), 295 "MD5": exp.MD5Digest.from_arg_list, 296 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 297 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 298 } 299 300 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 301 FUNCTION_PARSERS.pop("TRIM") 302 303 class Generator(generator.Generator): 304 INTERVAL_ALLOWS_PLURAL_FORM = False 305 JOIN_HINTS = False 306 TABLE_HINTS = False 307 QUERY_HINTS = False 308 IS_BOOL_ALLOWED = False 309 TZ_TO_WITH_TIME_ZONE = True 310 NVL2_SUPPORTED = False 311 STRUCT_DELIMITER = ("(", ")") 312 LIMIT_ONLY_LITERALS = True 313 SUPPORTS_SINGLE_ARG_CONCAT = False 314 LIKE_PROPERTY_INSIDE_SCHEMA = True 315 MULTI_ARG_DISTINCT = False 316 SUPPORTS_TO_NUMBER = False 317 HEX_FUNC = "TO_HEX" 318 PARSE_JSON_NAME = "JSON_PARSE" 319 PAD_FILL_PATTERN_IS_REQUIRED = True 320 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 321 SUPPORTS_MEDIAN = False 322 323 PROPERTIES_LOCATION = { 324 **generator.Generator.PROPERTIES_LOCATION, 325 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 326 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 327 } 328 329 TYPE_MAPPING = { 330 **generator.Generator.TYPE_MAPPING, 331 exp.DataType.Type.BINARY: "VARBINARY", 332 exp.DataType.Type.BIT: "BOOLEAN", 333 exp.DataType.Type.DATETIME: "TIMESTAMP", 334 exp.DataType.Type.DATETIME64: "TIMESTAMP", 335 exp.DataType.Type.FLOAT: "REAL", 336 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 337 exp.DataType.Type.INT: "INTEGER", 338 exp.DataType.Type.STRUCT: "ROW", 339 exp.DataType.Type.TEXT: "VARCHAR", 340 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 341 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 342 exp.DataType.Type.TIMETZ: "TIME", 343 } 344 345 TRANSFORMS = { 346 **generator.Generator.TRANSFORMS, 347 exp.AnyValue: rename_func("ARBITRARY"), 348 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 349 exp.ArgMax: rename_func("MAX_BY"), 350 exp.ArgMin: rename_func("MIN_BY"), 351 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 352 exp.ArrayAny: rename_func("ANY_MATCH"), 353 exp.ArrayConcat: rename_func("CONCAT"), 354 exp.ArrayContains: rename_func("CONTAINS"), 355 exp.ArraySize: rename_func("CARDINALITY"), 356 exp.ArrayToString: rename_func("ARRAY_JOIN"), 357 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 358 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 359 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 360 exp.BitwiseLeftShift: lambda self, e: self.func( 361 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 362 ), 363 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 364 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 365 exp.BitwiseRightShift: lambda self, e: self.func( 366 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 367 ), 368 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 369 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 370 exp.CurrentTime: lambda *_: "CURRENT_TIME", 371 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 372 exp.DateAdd: _date_delta_sql("DATE_ADD"), 373 exp.DateDiff: lambda self, e: self.func( 374 "DATE_DIFF", unit_to_str(e), e.expression, e.this 375 ), 376 exp.DateStrToDate: datestrtodate_sql, 377 exp.DateToDi: lambda self, 378 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 379 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 380 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 381 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 382 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 383 exp.DiToDate: lambda self, 384 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 385 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 386 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 387 exp.First: _first_last_sql, 388 exp.FirstValue: _first_last_sql, 389 exp.FromTimeZone: lambda self, 390 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 391 exp.GenerateSeries: sequence_sql, 392 exp.GenerateDateArray: sequence_sql, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.If: if_sql(), 395 exp.ILike: no_ilike_sql, 396 exp.Initcap: _initcap_sql, 397 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 398 exp.Last: _first_last_sql, 399 exp.LastValue: _first_last_sql, 400 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 401 exp.Lateral: explode_to_unnest_sql, 402 exp.Left: left_to_substring_sql, 403 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 404 exp.LogicalAnd: rename_func("BOOL_AND"), 405 exp.LogicalOr: rename_func("BOOL_OR"), 406 exp.Pivot: no_pivot_sql, 407 exp.Quantile: _quantile_sql, 408 exp.RegexpExtract: regexp_extract_sql, 409 exp.Right: right_to_substring_sql, 410 exp.SafeDivide: no_safe_divide_sql, 411 exp.Schema: _schema_sql, 412 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 413 exp.Select: transforms.preprocess( 414 [ 415 transforms.eliminate_qualify, 416 transforms.eliminate_distinct_on, 417 transforms.explode_to_unnest(1), 418 transforms.eliminate_semi_and_anti_joins, 419 ] 420 ), 421 exp.SortArray: _no_sort_array, 422 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 423 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 424 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 425 exp.StrToTime: _str_to_time_sql, 426 exp.StructExtract: struct_extract_sql, 427 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 428 exp.Timestamp: no_timestamp_sql, 429 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 430 exp.TimestampTrunc: timestamptrunc_sql(), 431 exp.TimeStrToDate: timestrtotime_sql, 432 exp.TimeStrToTime: timestrtotime_sql, 433 exp.TimeStrToUnix: lambda self, e: self.func( 434 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 435 ), 436 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 437 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 438 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 439 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 440 exp.TsOrDiToDi: lambda self, 441 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 442 exp.TsOrDsAdd: _ts_or_ds_add_sql, 443 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 444 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 445 exp.Unhex: rename_func("FROM_HEX"), 446 exp.UnixToStr: lambda self, 447 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 448 exp.UnixToTime: _unix_to_time_sql, 449 exp.UnixToTimeStr: lambda self, 450 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 451 exp.VariancePop: rename_func("VAR_POP"), 452 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 453 exp.WithinGroup: transforms.preprocess( 454 [transforms.remove_within_group_for_percentiles] 455 ), 456 exp.Xor: bool_xor_sql, 457 exp.MD5Digest: rename_func("MD5"), 458 exp.SHA: rename_func("SHA1"), 459 exp.SHA2: sha256_sql, 460 } 461 462 RESERVED_KEYWORDS = { 463 "alter", 464 "and", 465 "as", 466 "between", 467 "by", 468 "case", 469 "cast", 470 "constraint", 471 "create", 472 "cross", 473 "current_time", 474 "current_timestamp", 475 "deallocate", 476 "delete", 477 "describe", 478 "distinct", 479 "drop", 480 "else", 481 "end", 482 "escape", 483 "except", 484 "execute", 485 "exists", 486 "extract", 487 "false", 488 "for", 489 "from", 490 "full", 491 "group", 492 "having", 493 "in", 494 "inner", 495 "insert", 496 "intersect", 497 "into", 498 "is", 499 "join", 500 "left", 501 "like", 502 "natural", 503 "not", 504 "null", 505 "on", 506 "or", 507 "order", 508 "outer", 509 "prepare", 510 "right", 511 "select", 512 "table", 513 "then", 514 "true", 515 "union", 516 "using", 517 "values", 518 "when", 519 "where", 520 "with", 521 } 522 523 def md5_sql(self, expression: exp.MD5) -> str: 524 this = expression.this 525 526 if not this.type: 527 from sqlglot.optimizer.annotate_types import annotate_types 528 529 this = annotate_types(this) 530 531 if this.is_type(*exp.DataType.TEXT_TYPES): 532 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 533 534 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 535 536 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 537 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 538 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 539 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 540 # which seems to be using the same time mapping as Hive, as per: 541 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 542 this = expression.this 543 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 544 value_as_timestamp = ( 545 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 546 ) 547 548 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 549 550 formatted_value = self.func( 551 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 552 ) 553 parse_with_tz = self.func( 554 "PARSE_DATETIME", 555 formatted_value, 556 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 557 ) 558 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 559 return self.func("TO_UNIXTIME", coalesced) 560 561 def bracket_sql(self, expression: exp.Bracket) -> str: 562 if expression.args.get("safe"): 563 return self.func( 564 "ELEMENT_AT", 565 expression.this, 566 seq_get( 567 apply_index_offset( 568 expression.this, 569 expression.expressions, 570 1 - expression.args.get("offset", 0), 571 ), 572 0, 573 ), 574 ) 575 return super().bracket_sql(expression) 576 577 def struct_sql(self, expression: exp.Struct) -> str: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 expression = annotate_types(expression) 581 values: t.List[str] = [] 582 schema: t.List[str] = [] 583 unknown_type = False 584 585 for e in expression.expressions: 586 if isinstance(e, exp.PropertyEQ): 587 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 588 unknown_type = True 589 else: 590 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 591 values.append(self.sql(e, "expression")) 592 else: 593 values.append(self.sql(e)) 594 595 size = len(expression.expressions) 596 597 if not size or len(schema) != size: 598 if unknown_type: 599 self.unsupported( 600 "Cannot convert untyped key-value definitions (try annotate_types)." 601 ) 602 return self.func("ROW", *values) 603 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 604 605 def interval_sql(self, expression: exp.Interval) -> str: 606 if expression.this and expression.text("unit").upper().startswith("WEEK"): 607 return f"({expression.this.name} * INTERVAL '7' DAY)" 608 return super().interval_sql(expression) 609 610 def transaction_sql(self, expression: exp.Transaction) -> str: 611 modes = expression.args.get("modes") 612 modes = f" {', '.join(modes)}" if modes else "" 613 return f"START TRANSACTION{modes}" 614 615 def offset_limit_modifiers( 616 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 617 ) -> t.List[str]: 618 return [ 619 self.sql(expression, "offset"), 620 self.sql(limit), 621 ] 622 623 def create_sql(self, expression: exp.Create) -> str: 624 """ 625 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 626 so we need to remove them 627 """ 628 kind = expression.args["kind"] 629 schema = expression.this 630 if kind == "VIEW" and schema.expressions: 631 expression.this.set("expressions", None) 632 return super().create_sql(expression) 633 634 def delete_sql(self, expression: exp.Delete) -> str: 635 """ 636 Presto only supports DELETE FROM for a single table without an alias, so we need 637 to remove the unnecessary parts. If the original DELETE statement contains more 638 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 639 """ 640 tables = expression.args.get("tables") or [expression.this] 641 if len(tables) > 1: 642 return super().delete_sql(expression) 643 644 table = tables[0] 645 expression.set("this", table) 646 expression.set("tables", None) 647 648 if isinstance(table, exp.Table): 649 table_alias = table.args.get("alias") 650 if table_alias: 651 table_alias.pop() 652 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 653 654 return super().delete_sql(expression) 655 656 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 657 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 658 659 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 660 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 661 if not expression.args.get("variant_extract") or is_json_extract: 662 return self.func( 663 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 664 ) 665 666 this = self.sql(expression, "this") 667 668 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 669 segments = [] 670 for path_key in expression.expression.expressions[1:]: 671 if not isinstance(path_key, exp.JSONPathKey): 672 # Cannot transpile subscripts, wildcards etc to dot notation 673 self.unsupported( 674 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 675 ) 676 continue 677 key = path_key.this 678 if not exp.SAFE_IDENTIFIER_RE.match(key): 679 key = f'"{key}"' 680 segments.append(f".{key}") 681 682 expr = "".join(segments) 683 684 return f"{this}{expr}" 685 686 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 687 return self.func( 688 "ARRAY_JOIN", 689 self.func("ARRAY_AGG", expression.this), 690 expression.args.get("separator"), 691 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
224 class Tokenizer(tokens.Tokenizer): 225 UNICODE_STRINGS = [ 226 (prefix + q, q) 227 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 228 for prefix in ("U&", "u&") 229 ] 230 231 KEYWORDS = { 232 **tokens.Tokenizer.KEYWORDS, 233 "START": TokenType.BEGIN, 234 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 235 "ROW": TokenType.STRUCT, 236 "IPADDRESS": TokenType.IPADDRESS, 237 "IPPREFIX": TokenType.IPPREFIX, 238 "TDIGEST": TokenType.TDIGEST, 239 "HYPERLOGLOG": TokenType.HLLSKETCH, 240 } 241 KEYWORDS.pop("/*+") 242 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
244 class Parser(parser.Parser): 245 VALUES_FOLLOWED_BY_PAREN = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ARBITRARY": exp.AnyValue.from_arg_list, 250 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 251 "APPROX_PERCENTILE": _build_approx_percentile, 252 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 253 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 254 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 255 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 256 "CARDINALITY": exp.ArraySize.from_arg_list, 257 "CONTAINS": exp.ArrayContains.from_arg_list, 258 "DATE_ADD": lambda args: exp.DateAdd( 259 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 260 ), 261 "DATE_DIFF": lambda args: exp.DateDiff( 262 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 263 ), 264 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 265 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 266 "DATE_TRUNC": date_trunc_to_time, 267 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 268 "ELEMENT_AT": lambda args: exp.Bracket( 269 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 270 ), 271 "FROM_HEX": exp.Unhex.from_arg_list, 272 "FROM_UNIXTIME": _build_from_unixtime, 273 "FROM_UTF8": lambda args: exp.Decode( 274 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 275 ), 276 "NOW": exp.CurrentTimestamp.from_arg_list, 277 "REGEXP_EXTRACT": build_regexp_extract, 278 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 279 this=seq_get(args, 0), 280 expression=seq_get(args, 1), 281 replacement=seq_get(args, 2) or exp.Literal.string(""), 282 ), 283 "ROW": exp.Struct.from_arg_list, 284 "SEQUENCE": exp.GenerateSeries.from_arg_list, 285 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 286 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 287 "STRPOS": lambda args: exp.StrPosition( 288 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 289 ), 290 "TO_CHAR": _build_to_char, 291 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 292 "TO_UTF8": lambda args: exp.Encode( 293 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 294 ), 295 "MD5": exp.MD5Digest.from_arg_list, 296 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 297 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 298 } 299 300 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 301 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
303 class Generator(generator.Generator): 304 INTERVAL_ALLOWS_PLURAL_FORM = False 305 JOIN_HINTS = False 306 TABLE_HINTS = False 307 QUERY_HINTS = False 308 IS_BOOL_ALLOWED = False 309 TZ_TO_WITH_TIME_ZONE = True 310 NVL2_SUPPORTED = False 311 STRUCT_DELIMITER = ("(", ")") 312 LIMIT_ONLY_LITERALS = True 313 SUPPORTS_SINGLE_ARG_CONCAT = False 314 LIKE_PROPERTY_INSIDE_SCHEMA = True 315 MULTI_ARG_DISTINCT = False 316 SUPPORTS_TO_NUMBER = False 317 HEX_FUNC = "TO_HEX" 318 PARSE_JSON_NAME = "JSON_PARSE" 319 PAD_FILL_PATTERN_IS_REQUIRED = True 320 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 321 SUPPORTS_MEDIAN = False 322 323 PROPERTIES_LOCATION = { 324 **generator.Generator.PROPERTIES_LOCATION, 325 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 326 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 327 } 328 329 TYPE_MAPPING = { 330 **generator.Generator.TYPE_MAPPING, 331 exp.DataType.Type.BINARY: "VARBINARY", 332 exp.DataType.Type.BIT: "BOOLEAN", 333 exp.DataType.Type.DATETIME: "TIMESTAMP", 334 exp.DataType.Type.DATETIME64: "TIMESTAMP", 335 exp.DataType.Type.FLOAT: "REAL", 336 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 337 exp.DataType.Type.INT: "INTEGER", 338 exp.DataType.Type.STRUCT: "ROW", 339 exp.DataType.Type.TEXT: "VARCHAR", 340 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 341 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 342 exp.DataType.Type.TIMETZ: "TIME", 343 } 344 345 TRANSFORMS = { 346 **generator.Generator.TRANSFORMS, 347 exp.AnyValue: rename_func("ARBITRARY"), 348 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 349 exp.ArgMax: rename_func("MAX_BY"), 350 exp.ArgMin: rename_func("MIN_BY"), 351 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 352 exp.ArrayAny: rename_func("ANY_MATCH"), 353 exp.ArrayConcat: rename_func("CONCAT"), 354 exp.ArrayContains: rename_func("CONTAINS"), 355 exp.ArraySize: rename_func("CARDINALITY"), 356 exp.ArrayToString: rename_func("ARRAY_JOIN"), 357 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 358 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 359 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 360 exp.BitwiseLeftShift: lambda self, e: self.func( 361 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 362 ), 363 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 364 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 365 exp.BitwiseRightShift: lambda self, e: self.func( 366 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 367 ), 368 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 369 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 370 exp.CurrentTime: lambda *_: "CURRENT_TIME", 371 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 372 exp.DateAdd: _date_delta_sql("DATE_ADD"), 373 exp.DateDiff: lambda self, e: self.func( 374 "DATE_DIFF", unit_to_str(e), e.expression, e.this 375 ), 376 exp.DateStrToDate: datestrtodate_sql, 377 exp.DateToDi: lambda self, 378 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 379 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 380 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 381 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 382 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 383 exp.DiToDate: lambda self, 384 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 385 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 386 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 387 exp.First: _first_last_sql, 388 exp.FirstValue: _first_last_sql, 389 exp.FromTimeZone: lambda self, 390 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 391 exp.GenerateSeries: sequence_sql, 392 exp.GenerateDateArray: sequence_sql, 393 exp.Group: transforms.preprocess([transforms.unalias_group]), 394 exp.If: if_sql(), 395 exp.ILike: no_ilike_sql, 396 exp.Initcap: _initcap_sql, 397 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 398 exp.Last: _first_last_sql, 399 exp.LastValue: _first_last_sql, 400 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 401 exp.Lateral: explode_to_unnest_sql, 402 exp.Left: left_to_substring_sql, 403 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 404 exp.LogicalAnd: rename_func("BOOL_AND"), 405 exp.LogicalOr: rename_func("BOOL_OR"), 406 exp.Pivot: no_pivot_sql, 407 exp.Quantile: _quantile_sql, 408 exp.RegexpExtract: regexp_extract_sql, 409 exp.Right: right_to_substring_sql, 410 exp.SafeDivide: no_safe_divide_sql, 411 exp.Schema: _schema_sql, 412 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 413 exp.Select: transforms.preprocess( 414 [ 415 transforms.eliminate_qualify, 416 transforms.eliminate_distinct_on, 417 transforms.explode_to_unnest(1), 418 transforms.eliminate_semi_and_anti_joins, 419 ] 420 ), 421 exp.SortArray: _no_sort_array, 422 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 423 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 424 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 425 exp.StrToTime: _str_to_time_sql, 426 exp.StructExtract: struct_extract_sql, 427 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 428 exp.Timestamp: no_timestamp_sql, 429 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 430 exp.TimestampTrunc: timestamptrunc_sql(), 431 exp.TimeStrToDate: timestrtotime_sql, 432 exp.TimeStrToTime: timestrtotime_sql, 433 exp.TimeStrToUnix: lambda self, e: self.func( 434 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 435 ), 436 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 437 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 438 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 439 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 440 exp.TsOrDiToDi: lambda self, 441 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 442 exp.TsOrDsAdd: _ts_or_ds_add_sql, 443 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 444 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 445 exp.Unhex: rename_func("FROM_HEX"), 446 exp.UnixToStr: lambda self, 447 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 448 exp.UnixToTime: _unix_to_time_sql, 449 exp.UnixToTimeStr: lambda self, 450 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 451 exp.VariancePop: rename_func("VAR_POP"), 452 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 453 exp.WithinGroup: transforms.preprocess( 454 [transforms.remove_within_group_for_percentiles] 455 ), 456 exp.Xor: bool_xor_sql, 457 exp.MD5Digest: rename_func("MD5"), 458 exp.SHA: rename_func("SHA1"), 459 exp.SHA2: sha256_sql, 460 } 461 462 RESERVED_KEYWORDS = { 463 "alter", 464 "and", 465 "as", 466 "between", 467 "by", 468 "case", 469 "cast", 470 "constraint", 471 "create", 472 "cross", 473 "current_time", 474 "current_timestamp", 475 "deallocate", 476 "delete", 477 "describe", 478 "distinct", 479 "drop", 480 "else", 481 "end", 482 "escape", 483 "except", 484 "execute", 485 "exists", 486 "extract", 487 "false", 488 "for", 489 "from", 490 "full", 491 "group", 492 "having", 493 "in", 494 "inner", 495 "insert", 496 "intersect", 497 "into", 498 "is", 499 "join", 500 "left", 501 "like", 502 "natural", 503 "not", 504 "null", 505 "on", 506 "or", 507 "order", 508 "outer", 509 "prepare", 510 "right", 511 "select", 512 "table", 513 "then", 514 "true", 515 "union", 516 "using", 517 "values", 518 "when", 519 "where", 520 "with", 521 } 522 523 def md5_sql(self, expression: exp.MD5) -> str: 524 this = expression.this 525 526 if not this.type: 527 from sqlglot.optimizer.annotate_types import annotate_types 528 529 this = annotate_types(this) 530 531 if this.is_type(*exp.DataType.TEXT_TYPES): 532 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 533 534 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 535 536 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 537 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 538 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 539 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 540 # which seems to be using the same time mapping as Hive, as per: 541 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 542 this = expression.this 543 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 544 value_as_timestamp = ( 545 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 546 ) 547 548 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 549 550 formatted_value = self.func( 551 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 552 ) 553 parse_with_tz = self.func( 554 "PARSE_DATETIME", 555 formatted_value, 556 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 557 ) 558 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 559 return self.func("TO_UNIXTIME", coalesced) 560 561 def bracket_sql(self, expression: exp.Bracket) -> str: 562 if expression.args.get("safe"): 563 return self.func( 564 "ELEMENT_AT", 565 expression.this, 566 seq_get( 567 apply_index_offset( 568 expression.this, 569 expression.expressions, 570 1 - expression.args.get("offset", 0), 571 ), 572 0, 573 ), 574 ) 575 return super().bracket_sql(expression) 576 577 def struct_sql(self, expression: exp.Struct) -> str: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 expression = annotate_types(expression) 581 values: t.List[str] = [] 582 schema: t.List[str] = [] 583 unknown_type = False 584 585 for e in expression.expressions: 586 if isinstance(e, exp.PropertyEQ): 587 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 588 unknown_type = True 589 else: 590 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 591 values.append(self.sql(e, "expression")) 592 else: 593 values.append(self.sql(e)) 594 595 size = len(expression.expressions) 596 597 if not size or len(schema) != size: 598 if unknown_type: 599 self.unsupported( 600 "Cannot convert untyped key-value definitions (try annotate_types)." 601 ) 602 return self.func("ROW", *values) 603 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 604 605 def interval_sql(self, expression: exp.Interval) -> str: 606 if expression.this and expression.text("unit").upper().startswith("WEEK"): 607 return f"({expression.this.name} * INTERVAL '7' DAY)" 608 return super().interval_sql(expression) 609 610 def transaction_sql(self, expression: exp.Transaction) -> str: 611 modes = expression.args.get("modes") 612 modes = f" {', '.join(modes)}" if modes else "" 613 return f"START TRANSACTION{modes}" 614 615 def offset_limit_modifiers( 616 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 617 ) -> t.List[str]: 618 return [ 619 self.sql(expression, "offset"), 620 self.sql(limit), 621 ] 622 623 def create_sql(self, expression: exp.Create) -> str: 624 """ 625 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 626 so we need to remove them 627 """ 628 kind = expression.args["kind"] 629 schema = expression.this 630 if kind == "VIEW" and schema.expressions: 631 expression.this.set("expressions", None) 632 return super().create_sql(expression) 633 634 def delete_sql(self, expression: exp.Delete) -> str: 635 """ 636 Presto only supports DELETE FROM for a single table without an alias, so we need 637 to remove the unnecessary parts. If the original DELETE statement contains more 638 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 639 """ 640 tables = expression.args.get("tables") or [expression.this] 641 if len(tables) > 1: 642 return super().delete_sql(expression) 643 644 table = tables[0] 645 expression.set("this", table) 646 expression.set("tables", None) 647 648 if isinstance(table, exp.Table): 649 table_alias = table.args.get("alias") 650 if table_alias: 651 table_alias.pop() 652 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 653 654 return super().delete_sql(expression) 655 656 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 657 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 658 659 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 660 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 661 if not expression.args.get("variant_extract") or is_json_extract: 662 return self.func( 663 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 664 ) 665 666 this = self.sql(expression, "this") 667 668 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 669 segments = [] 670 for path_key in expression.expression.expressions[1:]: 671 if not isinstance(path_key, exp.JSONPathKey): 672 # Cannot transpile subscripts, wildcards etc to dot notation 673 self.unsupported( 674 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 675 ) 676 continue 677 key = path_key.this 678 if not exp.SAFE_IDENTIFIER_RE.match(key): 679 key = f'"{key}"' 680 segments.append(f".{key}") 681 682 expr = "".join(segments) 683 684 return f"{this}{expr}" 685 686 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 687 return self.func( 688 "ARRAY_JOIN", 689 self.func("ARRAY_AGG", expression.this), 690 expression.args.get("separator"), 691 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
523 def md5_sql(self, expression: exp.MD5) -> str: 524 this = expression.this 525 526 if not this.type: 527 from sqlglot.optimizer.annotate_types import annotate_types 528 529 this = annotate_types(this) 530 531 if this.is_type(*exp.DataType.TEXT_TYPES): 532 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 533 534 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
536 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 537 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 538 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 539 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 540 # which seems to be using the same time mapping as Hive, as per: 541 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 542 this = expression.this 543 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 544 value_as_timestamp = ( 545 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 546 ) 547 548 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 549 550 formatted_value = self.func( 551 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 552 ) 553 parse_with_tz = self.func( 554 "PARSE_DATETIME", 555 formatted_value, 556 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 557 ) 558 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 559 return self.func("TO_UNIXTIME", coalesced)
561 def bracket_sql(self, expression: exp.Bracket) -> str: 562 if expression.args.get("safe"): 563 return self.func( 564 "ELEMENT_AT", 565 expression.this, 566 seq_get( 567 apply_index_offset( 568 expression.this, 569 expression.expressions, 570 1 - expression.args.get("offset", 0), 571 ), 572 0, 573 ), 574 ) 575 return super().bracket_sql(expression)
577 def struct_sql(self, expression: exp.Struct) -> str: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 expression = annotate_types(expression) 581 values: t.List[str] = [] 582 schema: t.List[str] = [] 583 unknown_type = False 584 585 for e in expression.expressions: 586 if isinstance(e, exp.PropertyEQ): 587 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 588 unknown_type = True 589 else: 590 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 591 values.append(self.sql(e, "expression")) 592 else: 593 values.append(self.sql(e)) 594 595 size = len(expression.expressions) 596 597 if not size or len(schema) != size: 598 if unknown_type: 599 self.unsupported( 600 "Cannot convert untyped key-value definitions (try annotate_types)." 601 ) 602 return self.func("ROW", *values) 603 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
623 def create_sql(self, expression: exp.Create) -> str: 624 """ 625 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 626 so we need to remove them 627 """ 628 kind = expression.args["kind"] 629 schema = expression.this 630 if kind == "VIEW" and schema.expressions: 631 expression.this.set("expressions", None) 632 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
634 def delete_sql(self, expression: exp.Delete) -> str: 635 """ 636 Presto only supports DELETE FROM for a single table without an alias, so we need 637 to remove the unnecessary parts. If the original DELETE statement contains more 638 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 639 """ 640 tables = expression.args.get("tables") or [expression.this] 641 if len(tables) > 1: 642 return super().delete_sql(expression) 643 644 table = tables[0] 645 expression.set("this", table) 646 expression.set("tables", None) 647 648 if isinstance(table, exp.Table): 649 table_alias = table.args.get("alias") 650 if table_alias: 651 table_alias.pop() 652 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 653 654 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
656 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 657 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 658 659 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 660 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 661 if not expression.args.get("variant_extract") or is_json_extract: 662 return self.func( 663 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 664 ) 665 666 this = self.sql(expression, "this") 667 668 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 669 segments = [] 670 for path_key in expression.expression.expressions[1:]: 671 if not isinstance(path_key, exp.JSONPathKey): 672 # Cannot transpile subscripts, wildcards etc to dot notation 673 self.unsupported( 674 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 675 ) 676 continue 677 key = path_key.this 678 if not exp.SAFE_IDENTIFIER_RE.match(key): 679 key = f'"{key}"' 680 segments.append(f".{key}") 681 682 expr = "".join(segments) 683 684 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql