sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 str_position_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38) 39from sqlglot.generator import unsupported_args 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42from sqlglot.parser import binary_range_parser 43 44DATETIME_DELTA = t.Union[ 45 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 46] 47 48WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 49 exp.FirstValue, 50 exp.LastValue, 51 exp.Lag, 52 exp.Lead, 53 exp.NthValue, 54) 55 56 57def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 58 this = expression.this 59 unit = unit_to_var(expression) 60 op = ( 61 "+" 62 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 63 else "-" 64 ) 65 66 to_type: t.Optional[DATA_TYPE] = None 67 if isinstance(expression, exp.TsOrDsAdd): 68 to_type = expression.return_type 69 elif this.is_string: 70 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 71 to_type = ( 72 exp.DataType.Type.DATETIME 73 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 74 else exp.DataType.Type.DATE 75 ) 76 77 this = exp.cast(this, to_type) if to_type else this 78 79 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 80 81 82# BigQuery -> DuckDB conversion for the DATE function 83def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 84 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 85 zone = self.sql(expression, "zone") 86 87 if zone: 88 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 89 date_str = f"{date_str} || ' ' || {zone}" 90 91 # This will create a TIMESTAMP with time zone information 92 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 93 94 return result 95 96 97# BigQuery -> DuckDB conversion for the TIME_DIFF function 98def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIME) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 101 102 # Although the 2 dialects share similar signatures, BQ seems to inverse 103 # the sign of the result so the start/end time operands are flipped 104 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 105 106 107@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 108def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 109 return self.func("ARRAY_SORT", expression.this) 110 111 112def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 113 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 114 return self.func(name, expression.this) 115 116 117def _build_sort_array_desc(args: t.List) -> exp.Expression: 118 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 119 120 121def _build_date_diff(args: t.List) -> exp.Expression: 122 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 123 124 125def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 126 def _builder(args: t.List) -> exp.GenerateSeries: 127 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 128 if len(args) == 1: 129 # DuckDB uses 0 as a default for the series' start when it's omitted 130 args.insert(0, exp.Literal.number("0")) 131 132 gen_series = exp.GenerateSeries.from_arg_list(args) 133 gen_series.set("is_end_exclusive", end_exclusive) 134 135 return gen_series 136 137 return _builder 138 139 140def _build_make_timestamp(args: t.List) -> exp.Expression: 141 if len(args) == 1: 142 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 143 144 return exp.TimestampFromParts( 145 year=seq_get(args, 0), 146 month=seq_get(args, 1), 147 day=seq_get(args, 2), 148 hour=seq_get(args, 3), 149 min=seq_get(args, 4), 150 sec=seq_get(args, 5), 151 ) 152 153 154def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 155 args: t.List[str] = [] 156 157 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 158 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 159 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 160 ancestor_cast = expression.find_ancestor(exp.Cast) 161 is_struct_cast = ancestor_cast and any( 162 casted_type.is_type(exp.DataType.Type.STRUCT) 163 for casted_type in ancestor_cast.find_all(exp.DataType) 164 ) 165 166 for i, expr in enumerate(expression.expressions): 167 is_property_eq = isinstance(expr, exp.PropertyEQ) 168 value = expr.expression if is_property_eq else expr 169 170 if is_struct_cast: 171 args.append(self.sql(value)) 172 else: 173 key = expr.name if is_property_eq else f"_{i}" 174 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 175 176 csv_args = ", ".join(args) 177 178 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 179 180 181def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 182 if expression.is_type("array"): 183 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 184 185 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 186 if expression.is_type( 187 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 188 ): 189 return expression.this.value 190 191 return self.datatype_sql(expression) 192 193 194def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 195 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 196 return f"CAST({sql} AS TEXT)" 197 198 199def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 200 scale = expression.args.get("scale") 201 timestamp = expression.this 202 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return self.func("TO_TIMESTAMP", timestamp) 205 if scale == exp.UnixToTime.MILLIS: 206 return self.func("EPOCH_MS", timestamp) 207 if scale == exp.UnixToTime.MICROS: 208 return self.func("MAKE_TIMESTAMP", timestamp) 209 210 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 211 212 213WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 214 215 216def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 217 arrow_sql = arrow_json_extract_sql(self, expression) 218 if not expression.same_parent and isinstance( 219 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 220 ): 221 arrow_sql = self.wrap(arrow_sql) 222 return arrow_sql 223 224 225def _implicit_datetime_cast( 226 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 227) -> t.Optional[exp.Expression]: 228 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 229 230 231def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 232 this = _implicit_datetime_cast(expression.this) 233 expr = _implicit_datetime_cast(expression.expression) 234 235 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 236 237 238def _generate_datetime_array_sql( 239 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 240) -> str: 241 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 242 243 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 244 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 245 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 246 247 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 248 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 249 start=start, end=end, step=expression.args.get("step") 250 ) 251 252 if is_generate_date_array: 253 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 254 # GENERATE_DATE_ARRAY we must cast it back to DATE array 255 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 256 257 return self.sql(gen_series) 258 259 260class DuckDB(Dialect): 261 NULL_ORDERING = "nulls_are_last" 262 SUPPORTS_USER_DEFINED_TYPES = False 263 SAFE_DIVISION = True 264 INDEX_OFFSET = 1 265 CONCAT_COALESCE = True 266 SUPPORTS_ORDER_BY_ALL = True 267 SUPPORTS_FIXED_SIZE_ARRAYS = True 268 STRICT_JSON_PATH_SYNTAX = False 269 270 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 271 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 272 273 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 274 if isinstance(path, exp.Literal): 275 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 276 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 277 # This check ensures we'll avoid trying to parse these as JSON paths, which can 278 # either result in a noisy warning or in an invalid representation of the path. 279 path_text = path.name 280 if path_text.startswith("/") or "[#" in path_text: 281 return path 282 283 return super().to_json_path(path) 284 285 class Tokenizer(tokens.Tokenizer): 286 HEREDOC_STRINGS = ["$"] 287 288 HEREDOC_TAG_IS_IDENTIFIER = True 289 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 290 291 KEYWORDS = { 292 **tokens.Tokenizer.KEYWORDS, 293 "//": TokenType.DIV, 294 "**": TokenType.DSTAR, 295 "^@": TokenType.CARET_AT, 296 "@>": TokenType.AT_GT, 297 "<@": TokenType.LT_AT, 298 "ATTACH": TokenType.COMMAND, 299 "BINARY": TokenType.VARBINARY, 300 "BITSTRING": TokenType.BIT, 301 "BPCHAR": TokenType.TEXT, 302 "CHAR": TokenType.TEXT, 303 "CHARACTER VARYING": TokenType.TEXT, 304 "EXCLUDE": TokenType.EXCEPT, 305 "LOGICAL": TokenType.BOOLEAN, 306 "ONLY": TokenType.ONLY, 307 "PIVOT_WIDER": TokenType.PIVOT, 308 "POSITIONAL": TokenType.POSITIONAL, 309 "SIGNED": TokenType.INT, 310 "STRING": TokenType.TEXT, 311 "SUMMARIZE": TokenType.SUMMARIZE, 312 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 313 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 314 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 315 "TIMESTAMP_US": TokenType.TIMESTAMP, 316 "UBIGINT": TokenType.UBIGINT, 317 "UINTEGER": TokenType.UINT, 318 "USMALLINT": TokenType.USMALLINT, 319 "UTINYINT": TokenType.UTINYINT, 320 "VARCHAR": TokenType.TEXT, 321 } 322 KEYWORDS.pop("/*+") 323 324 SINGLE_TOKENS = { 325 **tokens.Tokenizer.SINGLE_TOKENS, 326 "$": TokenType.PARAMETER, 327 } 328 329 class Parser(parser.Parser): 330 BITWISE = { 331 **parser.Parser.BITWISE, 332 TokenType.TILDA: exp.RegexpLike, 333 } 334 BITWISE.pop(TokenType.CARET) 335 336 RANGE_PARSERS = { 337 **parser.Parser.RANGE_PARSERS, 338 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 339 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 340 } 341 342 EXPONENT = { 343 **parser.Parser.EXPONENT, 344 TokenType.CARET: exp.Pow, 345 TokenType.DSTAR: exp.Pow, 346 } 347 348 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 353 "ARRAY_SORT": exp.SortArray.from_arg_list, 354 "DATEDIFF": _build_date_diff, 355 "DATE_DIFF": _build_date_diff, 356 "DATE_TRUNC": date_trunc_to_time, 357 "DATETRUNC": date_trunc_to_time, 358 "DECODE": lambda args: exp.Decode( 359 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 360 ), 361 "ENCODE": lambda args: exp.Encode( 362 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 363 ), 364 "EPOCH": exp.TimeToUnix.from_arg_list, 365 "EPOCH_MS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "JSON": exp.ParseJSON.from_arg_list, 369 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 370 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 371 "LIST_HAS": exp.ArrayContains.from_arg_list, 372 "LIST_REVERSE_SORT": _build_sort_array_desc, 373 "LIST_SORT": exp.SortArray.from_arg_list, 374 "LIST_VALUE": lambda args: exp.Array(expressions=args), 375 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 376 "MAKE_TIMESTAMP": _build_make_timestamp, 377 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 378 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 379 "REGEXP_EXTRACT": build_regexp_extract, 380 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 381 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1), 384 replacement=seq_get(args, 2), 385 modifiers=seq_get(args, 3), 386 ), 387 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 388 "STRING_SPLIT": exp.Split.from_arg_list, 389 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 390 "STRING_TO_ARRAY": exp.Split.from_arg_list, 391 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 392 "STRUCT_PACK": exp.Struct.from_arg_list, 393 "STR_SPLIT": exp.Split.from_arg_list, 394 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 395 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 396 "UNNEST": exp.Explode.from_arg_list, 397 "XOR": binary_from_function(exp.BitwiseXor), 398 "GENERATE_SERIES": _build_generate_series(), 399 "RANGE": _build_generate_series(end_exclusive=True), 400 } 401 402 FUNCTIONS.pop("DATE_SUB") 403 FUNCTIONS.pop("GLOB") 404 405 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 406 FUNCTION_PARSERS.pop("DECODE") 407 408 NO_PAREN_FUNCTION_PARSERS = { 409 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 410 "MAP": lambda self: self._parse_map(), 411 } 412 413 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 414 TokenType.SEMI, 415 TokenType.ANTI, 416 } 417 418 PLACEHOLDER_PARSERS = { 419 **parser.Parser.PLACEHOLDER_PARSERS, 420 TokenType.PARAMETER: lambda self: ( 421 self.expression(exp.Placeholder, this=self._prev.text) 422 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 423 else None 424 ), 425 } 426 427 TYPE_CONVERTERS = { 428 # https://duckdb.org/docs/sql/data_types/numeric 429 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 430 # https://duckdb.org/docs/sql/data_types/text 431 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 432 } 433 434 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 435 # https://duckdb.org/docs/sql/samples.html 436 sample = super()._parse_table_sample(as_modifier=as_modifier) 437 if sample and not sample.args.get("method"): 438 if sample.args.get("size"): 439 sample.set("method", exp.var("RESERVOIR")) 440 else: 441 sample.set("method", exp.var("SYSTEM")) 442 443 return sample 444 445 def _parse_bracket( 446 self, this: t.Optional[exp.Expression] = None 447 ) -> t.Optional[exp.Expression]: 448 bracket = super()._parse_bracket(this) 449 if isinstance(bracket, exp.Bracket): 450 bracket.set("returns_list_for_maps", True) 451 452 return bracket 453 454 def _parse_map(self) -> exp.ToMap | exp.Map: 455 if self._match(TokenType.L_BRACE, advance=False): 456 return self.expression(exp.ToMap, this=self._parse_bracket()) 457 458 args = self._parse_wrapped_csv(self._parse_assignment) 459 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 460 461 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 462 return self._parse_field_def() 463 464 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 465 if len(aggregations) == 1: 466 return super()._pivot_column_names(aggregations) 467 return pivot_column_names(aggregations, dialect="duckdb") 468 469 class Generator(generator.Generator): 470 PARAMETER_TOKEN = "$" 471 NAMED_PLACEHOLDER_TOKEN = "$" 472 JOIN_HINTS = False 473 TABLE_HINTS = False 474 QUERY_HINTS = False 475 LIMIT_FETCH = "LIMIT" 476 STRUCT_DELIMITER = ("(", ")") 477 RENAME_TABLE_WITH_DB = False 478 NVL2_SUPPORTED = False 479 SEMI_ANTI_JOIN_WITH_SIDE = False 480 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 481 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 482 LAST_DAY_SUPPORTS_DATE_PART = False 483 JSON_KEY_VALUE_PAIR_SEP = "," 484 IGNORE_NULLS_IN_FUNC = True 485 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 486 SUPPORTS_CREATE_TABLE_LIKE = False 487 MULTI_ARG_DISTINCT = False 488 CAN_IMPLEMENT_ARRAY_ANY = True 489 SUPPORTS_TO_NUMBER = False 490 COPY_HAS_INTO_KEYWORD = False 491 STAR_EXCEPT = "EXCLUDE" 492 PAD_FILL_PATTERN_IS_REQUIRED = True 493 ARRAY_CONCAT_IS_VAR_LEN = False 494 495 TRANSFORMS = { 496 **generator.Generator.TRANSFORMS, 497 exp.ApproxDistinct: approx_count_distinct_sql, 498 exp.Array: inline_array_unless_query, 499 exp.ArrayFilter: rename_func("LIST_FILTER"), 500 exp.ArraySize: rename_func("ARRAY_LENGTH"), 501 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 502 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 503 exp.ArraySort: _array_sort_sql, 504 exp.ArraySum: rename_func("LIST_SUM"), 505 exp.BitwiseXor: rename_func("XOR"), 506 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 507 exp.CurrentDate: lambda *_: "CURRENT_DATE", 508 exp.CurrentTime: lambda *_: "CURRENT_TIME", 509 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 510 exp.DayOfMonth: rename_func("DAYOFMONTH"), 511 exp.DayOfWeek: rename_func("DAYOFWEEK"), 512 exp.DayOfWeekIso: rename_func("ISODOW"), 513 exp.DayOfYear: rename_func("DAYOFYEAR"), 514 exp.DataType: _datatype_sql, 515 exp.Date: _date_sql, 516 exp.DateAdd: _date_delta_sql, 517 exp.DateFromParts: rename_func("MAKE_DATE"), 518 exp.DateSub: _date_delta_sql, 519 exp.DateDiff: _date_diff_sql, 520 exp.DateStrToDate: datestrtodate_sql, 521 exp.Datetime: no_datetime_sql, 522 exp.DatetimeSub: _date_delta_sql, 523 exp.DatetimeAdd: _date_delta_sql, 524 exp.DateToDi: lambda self, 525 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 526 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 527 exp.DiToDate: lambda self, 528 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 529 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 530 exp.GenerateDateArray: _generate_datetime_array_sql, 531 exp.GenerateTimestampArray: _generate_datetime_array_sql, 532 exp.Explode: rename_func("UNNEST"), 533 exp.IntDiv: lambda self, e: self.binary(e, "//"), 534 exp.IsInf: rename_func("ISINF"), 535 exp.IsNan: rename_func("ISNAN"), 536 exp.JSONBExists: rename_func("JSON_EXISTS"), 537 exp.JSONExtract: _arrow_json_extract_sql, 538 exp.JSONExtractScalar: _arrow_json_extract_sql, 539 exp.JSONFormat: _json_format_sql, 540 exp.Lateral: explode_to_unnest_sql, 541 exp.LogicalOr: rename_func("BOOL_OR"), 542 exp.LogicalAnd: rename_func("BOOL_AND"), 543 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 544 exp.MonthsBetween: lambda self, e: self.func( 545 "DATEDIFF", 546 "'month'", 547 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 548 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 549 ), 550 exp.PercentileCont: rename_func("QUANTILE_CONT"), 551 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 552 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 553 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 554 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 555 exp.RegexpReplace: lambda self, e: self.func( 556 "REGEXP_REPLACE", 557 e.this, 558 e.expression, 559 e.args.get("replacement"), 560 e.args.get("modifiers"), 561 ), 562 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 563 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 564 exp.Return: lambda self, e: self.sql(e, "this"), 565 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 566 exp.Rand: rename_func("RANDOM"), 567 exp.SafeDivide: no_safe_divide_sql, 568 exp.SHA: rename_func("SHA1"), 569 exp.SHA2: sha256_sql, 570 exp.Split: rename_func("STR_SPLIT"), 571 exp.SortArray: _sort_array_sql, 572 exp.StrPosition: str_position_sql, 573 exp.StrToUnix: lambda self, e: self.func( 574 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 575 ), 576 exp.Struct: _struct_sql, 577 exp.Transform: rename_func("LIST_TRANSFORM"), 578 exp.TimeAdd: _date_delta_sql, 579 exp.Time: no_time_sql, 580 exp.TimeDiff: _timediff_sql, 581 exp.Timestamp: no_timestamp_sql, 582 exp.TimestampDiff: lambda self, e: self.func( 583 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 584 ), 585 exp.TimestampTrunc: timestamptrunc_sql(), 586 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 587 exp.TimeStrToTime: timestrtotime_sql, 588 exp.TimeStrToUnix: lambda self, e: self.func( 589 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 590 ), 591 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 592 exp.TimeToUnix: rename_func("EPOCH"), 593 exp.TsOrDiToDi: lambda self, 594 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 595 exp.TsOrDsAdd: _date_delta_sql, 596 exp.TsOrDsDiff: lambda self, e: self.func( 597 "DATE_DIFF", 598 f"'{e.args.get('unit') or 'DAY'}'", 599 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 600 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 601 ), 602 exp.UnixToStr: lambda self, e: self.func( 603 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 604 ), 605 exp.DatetimeTrunc: lambda self, e: self.func( 606 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 607 ), 608 exp.UnixToTime: _unix_to_time_sql, 609 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 610 exp.VariancePop: rename_func("VAR_POP"), 611 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 612 exp.Xor: bool_xor_sql, 613 } 614 615 SUPPORTED_JSON_PATH_PARTS = { 616 exp.JSONPathKey, 617 exp.JSONPathRoot, 618 exp.JSONPathSubscript, 619 exp.JSONPathWildcard, 620 } 621 622 TYPE_MAPPING = { 623 **generator.Generator.TYPE_MAPPING, 624 exp.DataType.Type.BINARY: "BLOB", 625 exp.DataType.Type.BPCHAR: "TEXT", 626 exp.DataType.Type.CHAR: "TEXT", 627 exp.DataType.Type.FLOAT: "REAL", 628 exp.DataType.Type.NCHAR: "TEXT", 629 exp.DataType.Type.NVARCHAR: "TEXT", 630 exp.DataType.Type.UINT: "UINTEGER", 631 exp.DataType.Type.VARBINARY: "BLOB", 632 exp.DataType.Type.ROWVERSION: "BLOB", 633 exp.DataType.Type.VARCHAR: "TEXT", 634 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 635 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 636 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 637 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 638 } 639 640 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 641 RESERVED_KEYWORDS = { 642 "array", 643 "analyse", 644 "union", 645 "all", 646 "when", 647 "in_p", 648 "default", 649 "create_p", 650 "window", 651 "asymmetric", 652 "to", 653 "else", 654 "localtime", 655 "from", 656 "end_p", 657 "select", 658 "current_date", 659 "foreign", 660 "with", 661 "grant", 662 "session_user", 663 "or", 664 "except", 665 "references", 666 "fetch", 667 "limit", 668 "group_p", 669 "leading", 670 "into", 671 "collate", 672 "offset", 673 "do", 674 "then", 675 "localtimestamp", 676 "check_p", 677 "lateral_p", 678 "current_role", 679 "where", 680 "asc_p", 681 "placing", 682 "desc_p", 683 "user", 684 "unique", 685 "initially", 686 "column", 687 "both", 688 "some", 689 "as", 690 "any", 691 "only", 692 "deferrable", 693 "null_p", 694 "current_time", 695 "true_p", 696 "table", 697 "case", 698 "trailing", 699 "variadic", 700 "for", 701 "on", 702 "distinct", 703 "false_p", 704 "not", 705 "constraint", 706 "current_timestamp", 707 "returning", 708 "primary", 709 "intersect", 710 "having", 711 "analyze", 712 "current_user", 713 "and", 714 "cast", 715 "symmetric", 716 "using", 717 "order", 718 "current_catalog", 719 } 720 721 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 722 723 # DuckDB doesn't generally support CREATE TABLE .. properties 724 # https://duckdb.org/docs/sql/statements/create_table.html 725 PROPERTIES_LOCATION = { 726 prop: exp.Properties.Location.UNSUPPORTED 727 for prop in generator.Generator.PROPERTIES_LOCATION 728 } 729 730 # There are a few exceptions (e.g. temporary tables) which are supported or 731 # can be transpiled to DuckDB, so we explicitly override them accordingly 732 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 733 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 734 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 735 736 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 737 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 738 739 def strtotime_sql(self, expression: exp.StrToTime) -> str: 740 if expression.args.get("safe"): 741 formatted_time = self.format_time(expression) 742 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 743 return str_to_time_sql(self, expression) 744 745 def strtodate_sql(self, expression: exp.StrToDate) -> str: 746 if expression.args.get("safe"): 747 formatted_time = self.format_time(expression) 748 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 749 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 750 751 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 752 arg = expression.this 753 if expression.args.get("safe"): 754 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 755 return self.func("JSON", arg) 756 757 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 758 nano = expression.args.get("nano") 759 if nano is not None: 760 expression.set( 761 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 762 ) 763 764 return rename_func("MAKE_TIME")(self, expression) 765 766 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 767 sec = expression.args["sec"] 768 769 milli = expression.args.get("milli") 770 if milli is not None: 771 sec += milli.pop() / exp.Literal.number(1000.0) 772 773 nano = expression.args.get("nano") 774 if nano is not None: 775 sec += nano.pop() / exp.Literal.number(1000000000.0) 776 777 if milli or nano: 778 expression.set("sec", sec) 779 780 return rename_func("MAKE_TIMESTAMP")(self, expression) 781 782 def tablesample_sql( 783 self, 784 expression: exp.TableSample, 785 tablesample_keyword: t.Optional[str] = None, 786 ) -> str: 787 if not isinstance(expression.parent, exp.Select): 788 # This sample clause only applies to a single source, not the entire resulting relation 789 tablesample_keyword = "TABLESAMPLE" 790 791 if expression.args.get("size"): 792 method = expression.args.get("method") 793 if method and method.name.upper() != "RESERVOIR": 794 self.unsupported( 795 f"Sampling method {method} is not supported with a discrete sample count, " 796 "defaulting to reservoir sampling" 797 ) 798 expression.set("method", exp.var("RESERVOIR")) 799 800 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 801 802 def interval_sql(self, expression: exp.Interval) -> str: 803 multiplier: t.Optional[int] = None 804 unit = expression.text("unit").lower() 805 806 if unit.startswith("week"): 807 multiplier = 7 808 if unit.startswith("quarter"): 809 multiplier = 90 810 811 if multiplier: 812 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 813 814 return super().interval_sql(expression) 815 816 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 817 if isinstance(expression.parent, exp.UserDefinedFunction): 818 return self.sql(expression, "this") 819 return super().columndef_sql(expression, sep) 820 821 def join_sql(self, expression: exp.Join) -> str: 822 if ( 823 expression.side == "LEFT" 824 and not expression.args.get("on") 825 and isinstance(expression.this, exp.Unnest) 826 ): 827 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 828 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 829 return super().join_sql(expression.on(exp.true())) 830 831 return super().join_sql(expression) 832 833 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 834 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 835 if expression.args.get("is_end_exclusive"): 836 return rename_func("RANGE")(self, expression) 837 838 return self.function_fallback_sql(expression) 839 840 def bracket_sql(self, expression: exp.Bracket) -> str: 841 this = expression.this 842 if isinstance(this, exp.Array): 843 this.replace(exp.paren(this)) 844 845 bracket = super().bracket_sql(expression) 846 847 if not expression.args.get("returns_list_for_maps"): 848 if not this.type: 849 from sqlglot.optimizer.annotate_types import annotate_types 850 851 this = annotate_types(this) 852 853 if this.is_type(exp.DataType.Type.MAP): 854 bracket = f"({bracket})[1]" 855 856 return bracket 857 858 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 859 expression_sql = self.sql(expression, "expression") 860 861 func = expression.this 862 if isinstance(func, exp.PERCENTILES): 863 # Make the order key the first arg and slide the fraction to the right 864 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 865 order_col = expression.find(exp.Ordered) 866 if order_col: 867 func.set("expression", func.this) 868 func.set("this", order_col.this) 869 870 this = self.sql(expression, "this").rstrip(")") 871 872 return f"{this}{expression_sql})" 873 874 def length_sql(self, expression: exp.Length) -> str: 875 arg = expression.this 876 877 # Dialects like BQ and Snowflake also accept binary values as args, so 878 # DDB will attempt to infer the type or resort to case/when resolution 879 if not expression.args.get("binary") or arg.is_string: 880 return self.func("LENGTH", arg) 881 882 if not arg.type: 883 from sqlglot.optimizer.annotate_types import annotate_types 884 885 arg = annotate_types(arg) 886 887 if arg.is_type(*exp.DataType.TEXT_TYPES): 888 return self.func("LENGTH", arg) 889 890 # We need these casts to make duckdb's static type checker happy 891 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 892 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 893 894 case = ( 895 exp.case(self.func("TYPEOF", arg)) 896 .when( 897 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 898 ) # anonymous to break length_sql recursion 899 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 900 ) 901 902 return self.sql(case) 903 904 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 905 this = expression.this 906 key = expression.args.get("key") 907 key_sql = key.name if isinstance(key, exp.Expression) else "" 908 value_sql = self.sql(expression, "value") 909 910 kv_sql = f"{key_sql} := {value_sql}" 911 912 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 913 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 914 if isinstance(this, exp.Struct) and not this.expressions: 915 return self.func("STRUCT_PACK", kv_sql) 916 917 return self.func("STRUCT_INSERT", this, kv_sql) 918 919 def unnest_sql(self, expression: exp.Unnest) -> str: 920 explode_array = expression.args.get("explode_array") 921 if explode_array: 922 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 923 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 924 expression.expressions.append( 925 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 926 ) 927 928 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 929 alias = expression.args.get("alias") 930 if alias: 931 expression.set("alias", None) 932 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 933 934 unnest_sql = super().unnest_sql(expression) 935 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 936 return self.sql(select) 937 938 return super().unnest_sql(expression) 939 940 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 941 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 942 # DuckDB should render IGNORE NULLS only for the general-purpose 943 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 944 return super().ignorenulls_sql(expression) 945 946 return self.sql(expression, "this") 947 948 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 949 this = self.sql(expression, "this") 950 null_text = self.sql(expression, "null") 951 952 if null_text: 953 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 954 955 return self.func("ARRAY_TO_STRING", this, expression.expression) 956 957 @unsupported_args("position", "occurrence") 958 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 959 group = expression.args.get("group") 960 params = expression.args.get("parameters") 961 962 # Do not render group if there is no following argument, 963 # and it's the default value for this dialect 964 if ( 965 not params 966 and group 967 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 968 ): 969 group = None 970 return self.func( 971 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 972 )
261class DuckDB(Dialect): 262 NULL_ORDERING = "nulls_are_last" 263 SUPPORTS_USER_DEFINED_TYPES = False 264 SAFE_DIVISION = True 265 INDEX_OFFSET = 1 266 CONCAT_COALESCE = True 267 SUPPORTS_ORDER_BY_ALL = True 268 SUPPORTS_FIXED_SIZE_ARRAYS = True 269 STRICT_JSON_PATH_SYNTAX = False 270 271 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 272 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 273 274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path) 285 286 class Tokenizer(tokens.Tokenizer): 287 HEREDOC_STRINGS = ["$"] 288 289 HEREDOC_TAG_IS_IDENTIFIER = True 290 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "//": TokenType.DIV, 295 "**": TokenType.DSTAR, 296 "^@": TokenType.CARET_AT, 297 "@>": TokenType.AT_GT, 298 "<@": TokenType.LT_AT, 299 "ATTACH": TokenType.COMMAND, 300 "BINARY": TokenType.VARBINARY, 301 "BITSTRING": TokenType.BIT, 302 "BPCHAR": TokenType.TEXT, 303 "CHAR": TokenType.TEXT, 304 "CHARACTER VARYING": TokenType.TEXT, 305 "EXCLUDE": TokenType.EXCEPT, 306 "LOGICAL": TokenType.BOOLEAN, 307 "ONLY": TokenType.ONLY, 308 "PIVOT_WIDER": TokenType.PIVOT, 309 "POSITIONAL": TokenType.POSITIONAL, 310 "SIGNED": TokenType.INT, 311 "STRING": TokenType.TEXT, 312 "SUMMARIZE": TokenType.SUMMARIZE, 313 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 314 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 315 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 316 "TIMESTAMP_US": TokenType.TIMESTAMP, 317 "UBIGINT": TokenType.UBIGINT, 318 "UINTEGER": TokenType.UINT, 319 "USMALLINT": TokenType.USMALLINT, 320 "UTINYINT": TokenType.UTINYINT, 321 "VARCHAR": TokenType.TEXT, 322 } 323 KEYWORDS.pop("/*+") 324 325 SINGLE_TOKENS = { 326 **tokens.Tokenizer.SINGLE_TOKENS, 327 "$": TokenType.PARAMETER, 328 } 329 330 class Parser(parser.Parser): 331 BITWISE = { 332 **parser.Parser.BITWISE, 333 TokenType.TILDA: exp.RegexpLike, 334 } 335 BITWISE.pop(TokenType.CARET) 336 337 RANGE_PARSERS = { 338 **parser.Parser.RANGE_PARSERS, 339 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 340 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 341 } 342 343 EXPONENT = { 344 **parser.Parser.EXPONENT, 345 TokenType.CARET: exp.Pow, 346 TokenType.DSTAR: exp.Pow, 347 } 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 350 351 FUNCTIONS = { 352 **parser.Parser.FUNCTIONS, 353 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 354 "ARRAY_SORT": exp.SortArray.from_arg_list, 355 "DATEDIFF": _build_date_diff, 356 "DATE_DIFF": _build_date_diff, 357 "DATE_TRUNC": date_trunc_to_time, 358 "DATETRUNC": date_trunc_to_time, 359 "DECODE": lambda args: exp.Decode( 360 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 361 ), 362 "ENCODE": lambda args: exp.Encode( 363 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 364 ), 365 "EPOCH": exp.TimeToUnix.from_arg_list, 366 "EPOCH_MS": lambda args: exp.UnixToTime( 367 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 368 ), 369 "JSON": exp.ParseJSON.from_arg_list, 370 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 371 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 372 "LIST_HAS": exp.ArrayContains.from_arg_list, 373 "LIST_REVERSE_SORT": _build_sort_array_desc, 374 "LIST_SORT": exp.SortArray.from_arg_list, 375 "LIST_VALUE": lambda args: exp.Array(expressions=args), 376 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 377 "MAKE_TIMESTAMP": _build_make_timestamp, 378 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 379 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 380 "REGEXP_EXTRACT": build_regexp_extract, 381 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 382 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1), 385 replacement=seq_get(args, 2), 386 modifiers=seq_get(args, 3), 387 ), 388 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 389 "STRING_SPLIT": exp.Split.from_arg_list, 390 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 391 "STRING_TO_ARRAY": exp.Split.from_arg_list, 392 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 393 "STRUCT_PACK": exp.Struct.from_arg_list, 394 "STR_SPLIT": exp.Split.from_arg_list, 395 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 396 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 397 "UNNEST": exp.Explode.from_arg_list, 398 "XOR": binary_from_function(exp.BitwiseXor), 399 "GENERATE_SERIES": _build_generate_series(), 400 "RANGE": _build_generate_series(end_exclusive=True), 401 } 402 403 FUNCTIONS.pop("DATE_SUB") 404 FUNCTIONS.pop("GLOB") 405 406 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 407 FUNCTION_PARSERS.pop("DECODE") 408 409 NO_PAREN_FUNCTION_PARSERS = { 410 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 411 "MAP": lambda self: self._parse_map(), 412 } 413 414 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 PLACEHOLDER_PARSERS = { 420 **parser.Parser.PLACEHOLDER_PARSERS, 421 TokenType.PARAMETER: lambda self: ( 422 self.expression(exp.Placeholder, this=self._prev.text) 423 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 424 else None 425 ), 426 } 427 428 TYPE_CONVERTERS = { 429 # https://duckdb.org/docs/sql/data_types/numeric 430 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 431 # https://duckdb.org/docs/sql/data_types/text 432 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 433 } 434 435 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 436 # https://duckdb.org/docs/sql/samples.html 437 sample = super()._parse_table_sample(as_modifier=as_modifier) 438 if sample and not sample.args.get("method"): 439 if sample.args.get("size"): 440 sample.set("method", exp.var("RESERVOIR")) 441 else: 442 sample.set("method", exp.var("SYSTEM")) 443 444 return sample 445 446 def _parse_bracket( 447 self, this: t.Optional[exp.Expression] = None 448 ) -> t.Optional[exp.Expression]: 449 bracket = super()._parse_bracket(this) 450 if isinstance(bracket, exp.Bracket): 451 bracket.set("returns_list_for_maps", True) 452 453 return bracket 454 455 def _parse_map(self) -> exp.ToMap | exp.Map: 456 if self._match(TokenType.L_BRACE, advance=False): 457 return self.expression(exp.ToMap, this=self._parse_bracket()) 458 459 args = self._parse_wrapped_csv(self._parse_assignment) 460 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 461 462 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 463 return self._parse_field_def() 464 465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 466 if len(aggregations) == 1: 467 return super()._pivot_column_names(aggregations) 468 return pivot_column_names(aggregations, dialect="duckdb") 469 470 class Generator(generator.Generator): 471 PARAMETER_TOKEN = "$" 472 NAMED_PLACEHOLDER_TOKEN = "$" 473 JOIN_HINTS = False 474 TABLE_HINTS = False 475 QUERY_HINTS = False 476 LIMIT_FETCH = "LIMIT" 477 STRUCT_DELIMITER = ("(", ")") 478 RENAME_TABLE_WITH_DB = False 479 NVL2_SUPPORTED = False 480 SEMI_ANTI_JOIN_WITH_SIDE = False 481 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 482 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 483 LAST_DAY_SUPPORTS_DATE_PART = False 484 JSON_KEY_VALUE_PAIR_SEP = "," 485 IGNORE_NULLS_IN_FUNC = True 486 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 487 SUPPORTS_CREATE_TABLE_LIKE = False 488 MULTI_ARG_DISTINCT = False 489 CAN_IMPLEMENT_ARRAY_ANY = True 490 SUPPORTS_TO_NUMBER = False 491 COPY_HAS_INTO_KEYWORD = False 492 STAR_EXCEPT = "EXCLUDE" 493 PAD_FILL_PATTERN_IS_REQUIRED = True 494 ARRAY_CONCAT_IS_VAR_LEN = False 495 496 TRANSFORMS = { 497 **generator.Generator.TRANSFORMS, 498 exp.ApproxDistinct: approx_count_distinct_sql, 499 exp.Array: inline_array_unless_query, 500 exp.ArrayFilter: rename_func("LIST_FILTER"), 501 exp.ArraySize: rename_func("ARRAY_LENGTH"), 502 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 503 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 504 exp.ArraySort: _array_sort_sql, 505 exp.ArraySum: rename_func("LIST_SUM"), 506 exp.BitwiseXor: rename_func("XOR"), 507 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 508 exp.CurrentDate: lambda *_: "CURRENT_DATE", 509 exp.CurrentTime: lambda *_: "CURRENT_TIME", 510 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 511 exp.DayOfMonth: rename_func("DAYOFMONTH"), 512 exp.DayOfWeek: rename_func("DAYOFWEEK"), 513 exp.DayOfWeekIso: rename_func("ISODOW"), 514 exp.DayOfYear: rename_func("DAYOFYEAR"), 515 exp.DataType: _datatype_sql, 516 exp.Date: _date_sql, 517 exp.DateAdd: _date_delta_sql, 518 exp.DateFromParts: rename_func("MAKE_DATE"), 519 exp.DateSub: _date_delta_sql, 520 exp.DateDiff: _date_diff_sql, 521 exp.DateStrToDate: datestrtodate_sql, 522 exp.Datetime: no_datetime_sql, 523 exp.DatetimeSub: _date_delta_sql, 524 exp.DatetimeAdd: _date_delta_sql, 525 exp.DateToDi: lambda self, 526 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 527 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 528 exp.DiToDate: lambda self, 529 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 530 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 531 exp.GenerateDateArray: _generate_datetime_array_sql, 532 exp.GenerateTimestampArray: _generate_datetime_array_sql, 533 exp.Explode: rename_func("UNNEST"), 534 exp.IntDiv: lambda self, e: self.binary(e, "//"), 535 exp.IsInf: rename_func("ISINF"), 536 exp.IsNan: rename_func("ISNAN"), 537 exp.JSONBExists: rename_func("JSON_EXISTS"), 538 exp.JSONExtract: _arrow_json_extract_sql, 539 exp.JSONExtractScalar: _arrow_json_extract_sql, 540 exp.JSONFormat: _json_format_sql, 541 exp.Lateral: explode_to_unnest_sql, 542 exp.LogicalOr: rename_func("BOOL_OR"), 543 exp.LogicalAnd: rename_func("BOOL_AND"), 544 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 545 exp.MonthsBetween: lambda self, e: self.func( 546 "DATEDIFF", 547 "'month'", 548 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 549 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 550 ), 551 exp.PercentileCont: rename_func("QUANTILE_CONT"), 552 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 553 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 554 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 555 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 556 exp.RegexpReplace: lambda self, e: self.func( 557 "REGEXP_REPLACE", 558 e.this, 559 e.expression, 560 e.args.get("replacement"), 561 e.args.get("modifiers"), 562 ), 563 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 564 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 565 exp.Return: lambda self, e: self.sql(e, "this"), 566 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 567 exp.Rand: rename_func("RANDOM"), 568 exp.SafeDivide: no_safe_divide_sql, 569 exp.SHA: rename_func("SHA1"), 570 exp.SHA2: sha256_sql, 571 exp.Split: rename_func("STR_SPLIT"), 572 exp.SortArray: _sort_array_sql, 573 exp.StrPosition: str_position_sql, 574 exp.StrToUnix: lambda self, e: self.func( 575 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 576 ), 577 exp.Struct: _struct_sql, 578 exp.Transform: rename_func("LIST_TRANSFORM"), 579 exp.TimeAdd: _date_delta_sql, 580 exp.Time: no_time_sql, 581 exp.TimeDiff: _timediff_sql, 582 exp.Timestamp: no_timestamp_sql, 583 exp.TimestampDiff: lambda self, e: self.func( 584 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 585 ), 586 exp.TimestampTrunc: timestamptrunc_sql(), 587 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 588 exp.TimeStrToTime: timestrtotime_sql, 589 exp.TimeStrToUnix: lambda self, e: self.func( 590 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 591 ), 592 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 593 exp.TimeToUnix: rename_func("EPOCH"), 594 exp.TsOrDiToDi: lambda self, 595 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 596 exp.TsOrDsAdd: _date_delta_sql, 597 exp.TsOrDsDiff: lambda self, e: self.func( 598 "DATE_DIFF", 599 f"'{e.args.get('unit') or 'DAY'}'", 600 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 601 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 602 ), 603 exp.UnixToStr: lambda self, e: self.func( 604 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 605 ), 606 exp.DatetimeTrunc: lambda self, e: self.func( 607 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 608 ), 609 exp.UnixToTime: _unix_to_time_sql, 610 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 611 exp.VariancePop: rename_func("VAR_POP"), 612 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 613 exp.Xor: bool_xor_sql, 614 } 615 616 SUPPORTED_JSON_PATH_PARTS = { 617 exp.JSONPathKey, 618 exp.JSONPathRoot, 619 exp.JSONPathSubscript, 620 exp.JSONPathWildcard, 621 } 622 623 TYPE_MAPPING = { 624 **generator.Generator.TYPE_MAPPING, 625 exp.DataType.Type.BINARY: "BLOB", 626 exp.DataType.Type.BPCHAR: "TEXT", 627 exp.DataType.Type.CHAR: "TEXT", 628 exp.DataType.Type.FLOAT: "REAL", 629 exp.DataType.Type.NCHAR: "TEXT", 630 exp.DataType.Type.NVARCHAR: "TEXT", 631 exp.DataType.Type.UINT: "UINTEGER", 632 exp.DataType.Type.VARBINARY: "BLOB", 633 exp.DataType.Type.ROWVERSION: "BLOB", 634 exp.DataType.Type.VARCHAR: "TEXT", 635 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 636 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 637 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 638 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 639 } 640 641 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 642 RESERVED_KEYWORDS = { 643 "array", 644 "analyse", 645 "union", 646 "all", 647 "when", 648 "in_p", 649 "default", 650 "create_p", 651 "window", 652 "asymmetric", 653 "to", 654 "else", 655 "localtime", 656 "from", 657 "end_p", 658 "select", 659 "current_date", 660 "foreign", 661 "with", 662 "grant", 663 "session_user", 664 "or", 665 "except", 666 "references", 667 "fetch", 668 "limit", 669 "group_p", 670 "leading", 671 "into", 672 "collate", 673 "offset", 674 "do", 675 "then", 676 "localtimestamp", 677 "check_p", 678 "lateral_p", 679 "current_role", 680 "where", 681 "asc_p", 682 "placing", 683 "desc_p", 684 "user", 685 "unique", 686 "initially", 687 "column", 688 "both", 689 "some", 690 "as", 691 "any", 692 "only", 693 "deferrable", 694 "null_p", 695 "current_time", 696 "true_p", 697 "table", 698 "case", 699 "trailing", 700 "variadic", 701 "for", 702 "on", 703 "distinct", 704 "false_p", 705 "not", 706 "constraint", 707 "current_timestamp", 708 "returning", 709 "primary", 710 "intersect", 711 "having", 712 "analyze", 713 "current_user", 714 "and", 715 "cast", 716 "symmetric", 717 "using", 718 "order", 719 "current_catalog", 720 } 721 722 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 723 724 # DuckDB doesn't generally support CREATE TABLE .. properties 725 # https://duckdb.org/docs/sql/statements/create_table.html 726 PROPERTIES_LOCATION = { 727 prop: exp.Properties.Location.UNSUPPORTED 728 for prop in generator.Generator.PROPERTIES_LOCATION 729 } 730 731 # There are a few exceptions (e.g. temporary tables) which are supported or 732 # can be transpiled to DuckDB, so we explicitly override them accordingly 733 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 734 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 735 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 736 737 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 738 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 739 740 def strtotime_sql(self, expression: exp.StrToTime) -> str: 741 if expression.args.get("safe"): 742 formatted_time = self.format_time(expression) 743 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 744 return str_to_time_sql(self, expression) 745 746 def strtodate_sql(self, expression: exp.StrToDate) -> str: 747 if expression.args.get("safe"): 748 formatted_time = self.format_time(expression) 749 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 750 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 751 752 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 753 arg = expression.this 754 if expression.args.get("safe"): 755 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 756 return self.func("JSON", arg) 757 758 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 759 nano = expression.args.get("nano") 760 if nano is not None: 761 expression.set( 762 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 763 ) 764 765 return rename_func("MAKE_TIME")(self, expression) 766 767 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 768 sec = expression.args["sec"] 769 770 milli = expression.args.get("milli") 771 if milli is not None: 772 sec += milli.pop() / exp.Literal.number(1000.0) 773 774 nano = expression.args.get("nano") 775 if nano is not None: 776 sec += nano.pop() / exp.Literal.number(1000000000.0) 777 778 if milli or nano: 779 expression.set("sec", sec) 780 781 return rename_func("MAKE_TIMESTAMP")(self, expression) 782 783 def tablesample_sql( 784 self, 785 expression: exp.TableSample, 786 tablesample_keyword: t.Optional[str] = None, 787 ) -> str: 788 if not isinstance(expression.parent, exp.Select): 789 # This sample clause only applies to a single source, not the entire resulting relation 790 tablesample_keyword = "TABLESAMPLE" 791 792 if expression.args.get("size"): 793 method = expression.args.get("method") 794 if method and method.name.upper() != "RESERVOIR": 795 self.unsupported( 796 f"Sampling method {method} is not supported with a discrete sample count, " 797 "defaulting to reservoir sampling" 798 ) 799 expression.set("method", exp.var("RESERVOIR")) 800 801 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 802 803 def interval_sql(self, expression: exp.Interval) -> str: 804 multiplier: t.Optional[int] = None 805 unit = expression.text("unit").lower() 806 807 if unit.startswith("week"): 808 multiplier = 7 809 if unit.startswith("quarter"): 810 multiplier = 90 811 812 if multiplier: 813 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 814 815 return super().interval_sql(expression) 816 817 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 818 if isinstance(expression.parent, exp.UserDefinedFunction): 819 return self.sql(expression, "this") 820 return super().columndef_sql(expression, sep) 821 822 def join_sql(self, expression: exp.Join) -> str: 823 if ( 824 expression.side == "LEFT" 825 and not expression.args.get("on") 826 and isinstance(expression.this, exp.Unnest) 827 ): 828 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 829 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 830 return super().join_sql(expression.on(exp.true())) 831 832 return super().join_sql(expression) 833 834 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 835 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 836 if expression.args.get("is_end_exclusive"): 837 return rename_func("RANGE")(self, expression) 838 839 return self.function_fallback_sql(expression) 840 841 def bracket_sql(self, expression: exp.Bracket) -> str: 842 this = expression.this 843 if isinstance(this, exp.Array): 844 this.replace(exp.paren(this)) 845 846 bracket = super().bracket_sql(expression) 847 848 if not expression.args.get("returns_list_for_maps"): 849 if not this.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 this = annotate_types(this) 853 854 if this.is_type(exp.DataType.Type.MAP): 855 bracket = f"({bracket})[1]" 856 857 return bracket 858 859 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 860 expression_sql = self.sql(expression, "expression") 861 862 func = expression.this 863 if isinstance(func, exp.PERCENTILES): 864 # Make the order key the first arg and slide the fraction to the right 865 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 866 order_col = expression.find(exp.Ordered) 867 if order_col: 868 func.set("expression", func.this) 869 func.set("this", order_col.this) 870 871 this = self.sql(expression, "this").rstrip(")") 872 873 return f"{this}{expression_sql})" 874 875 def length_sql(self, expression: exp.Length) -> str: 876 arg = expression.this 877 878 # Dialects like BQ and Snowflake also accept binary values as args, so 879 # DDB will attempt to infer the type or resort to case/when resolution 880 if not expression.args.get("binary") or arg.is_string: 881 return self.func("LENGTH", arg) 882 883 if not arg.type: 884 from sqlglot.optimizer.annotate_types import annotate_types 885 886 arg = annotate_types(arg) 887 888 if arg.is_type(*exp.DataType.TEXT_TYPES): 889 return self.func("LENGTH", arg) 890 891 # We need these casts to make duckdb's static type checker happy 892 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 893 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 894 895 case = ( 896 exp.case(self.func("TYPEOF", arg)) 897 .when( 898 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 899 ) # anonymous to break length_sql recursion 900 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 901 ) 902 903 return self.sql(case) 904 905 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 906 this = expression.this 907 key = expression.args.get("key") 908 key_sql = key.name if isinstance(key, exp.Expression) else "" 909 value_sql = self.sql(expression, "value") 910 911 kv_sql = f"{key_sql} := {value_sql}" 912 913 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 914 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 915 if isinstance(this, exp.Struct) and not this.expressions: 916 return self.func("STRUCT_PACK", kv_sql) 917 918 return self.func("STRUCT_INSERT", this, kv_sql) 919 920 def unnest_sql(self, expression: exp.Unnest) -> str: 921 explode_array = expression.args.get("explode_array") 922 if explode_array: 923 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 924 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 925 expression.expressions.append( 926 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 927 ) 928 929 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 930 alias = expression.args.get("alias") 931 if alias: 932 expression.set("alias", None) 933 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 934 935 unnest_sql = super().unnest_sql(expression) 936 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 937 return self.sql(select) 938 939 return super().unnest_sql(expression) 940 941 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 942 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 943 # DuckDB should render IGNORE NULLS only for the general-purpose 944 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 945 return super().ignorenulls_sql(expression) 946 947 return self.sql(expression, "this") 948 949 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 950 this = self.sql(expression, "this") 951 null_text = self.sql(expression, "null") 952 953 if null_text: 954 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 955 956 return self.func("ARRAY_TO_STRING", this, expression.expression) 957 958 @unsupported_args("position", "occurrence") 959 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 960 group = expression.args.get("group") 961 params = expression.args.get("parameters") 962 963 # Do not render group if there is no following argument, 964 # and it's the default value for this dialect 965 if ( 966 not params 967 and group 968 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 969 ): 970 group = None 971 return self.func( 972 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 973 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
274 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 275 if isinstance(path, exp.Literal): 276 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 277 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 278 # This check ensures we'll avoid trying to parse these as JSON paths, which can 279 # either result in a noisy warning or in an invalid representation of the path. 280 path_text = path.name 281 if path_text.startswith("/") or "[#" in path_text: 282 return path 283 284 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
286 class Tokenizer(tokens.Tokenizer): 287 HEREDOC_STRINGS = ["$"] 288 289 HEREDOC_TAG_IS_IDENTIFIER = True 290 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "//": TokenType.DIV, 295 "**": TokenType.DSTAR, 296 "^@": TokenType.CARET_AT, 297 "@>": TokenType.AT_GT, 298 "<@": TokenType.LT_AT, 299 "ATTACH": TokenType.COMMAND, 300 "BINARY": TokenType.VARBINARY, 301 "BITSTRING": TokenType.BIT, 302 "BPCHAR": TokenType.TEXT, 303 "CHAR": TokenType.TEXT, 304 "CHARACTER VARYING": TokenType.TEXT, 305 "EXCLUDE": TokenType.EXCEPT, 306 "LOGICAL": TokenType.BOOLEAN, 307 "ONLY": TokenType.ONLY, 308 "PIVOT_WIDER": TokenType.PIVOT, 309 "POSITIONAL": TokenType.POSITIONAL, 310 "SIGNED": TokenType.INT, 311 "STRING": TokenType.TEXT, 312 "SUMMARIZE": TokenType.SUMMARIZE, 313 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 314 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 315 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 316 "TIMESTAMP_US": TokenType.TIMESTAMP, 317 "UBIGINT": TokenType.UBIGINT, 318 "UINTEGER": TokenType.UINT, 319 "USMALLINT": TokenType.USMALLINT, 320 "UTINYINT": TokenType.UTINYINT, 321 "VARCHAR": TokenType.TEXT, 322 } 323 KEYWORDS.pop("/*+") 324 325 SINGLE_TOKENS = { 326 **tokens.Tokenizer.SINGLE_TOKENS, 327 "$": TokenType.PARAMETER, 328 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
330 class Parser(parser.Parser): 331 BITWISE = { 332 **parser.Parser.BITWISE, 333 TokenType.TILDA: exp.RegexpLike, 334 } 335 BITWISE.pop(TokenType.CARET) 336 337 RANGE_PARSERS = { 338 **parser.Parser.RANGE_PARSERS, 339 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 340 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 341 } 342 343 EXPONENT = { 344 **parser.Parser.EXPONENT, 345 TokenType.CARET: exp.Pow, 346 TokenType.DSTAR: exp.Pow, 347 } 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 350 351 FUNCTIONS = { 352 **parser.Parser.FUNCTIONS, 353 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 354 "ARRAY_SORT": exp.SortArray.from_arg_list, 355 "DATEDIFF": _build_date_diff, 356 "DATE_DIFF": _build_date_diff, 357 "DATE_TRUNC": date_trunc_to_time, 358 "DATETRUNC": date_trunc_to_time, 359 "DECODE": lambda args: exp.Decode( 360 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 361 ), 362 "ENCODE": lambda args: exp.Encode( 363 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 364 ), 365 "EPOCH": exp.TimeToUnix.from_arg_list, 366 "EPOCH_MS": lambda args: exp.UnixToTime( 367 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 368 ), 369 "JSON": exp.ParseJSON.from_arg_list, 370 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 371 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 372 "LIST_HAS": exp.ArrayContains.from_arg_list, 373 "LIST_REVERSE_SORT": _build_sort_array_desc, 374 "LIST_SORT": exp.SortArray.from_arg_list, 375 "LIST_VALUE": lambda args: exp.Array(expressions=args), 376 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 377 "MAKE_TIMESTAMP": _build_make_timestamp, 378 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 379 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 380 "REGEXP_EXTRACT": build_regexp_extract, 381 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 382 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1), 385 replacement=seq_get(args, 2), 386 modifiers=seq_get(args, 3), 387 ), 388 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 389 "STRING_SPLIT": exp.Split.from_arg_list, 390 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 391 "STRING_TO_ARRAY": exp.Split.from_arg_list, 392 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 393 "STRUCT_PACK": exp.Struct.from_arg_list, 394 "STR_SPLIT": exp.Split.from_arg_list, 395 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 396 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 397 "UNNEST": exp.Explode.from_arg_list, 398 "XOR": binary_from_function(exp.BitwiseXor), 399 "GENERATE_SERIES": _build_generate_series(), 400 "RANGE": _build_generate_series(end_exclusive=True), 401 } 402 403 FUNCTIONS.pop("DATE_SUB") 404 FUNCTIONS.pop("GLOB") 405 406 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 407 FUNCTION_PARSERS.pop("DECODE") 408 409 NO_PAREN_FUNCTION_PARSERS = { 410 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 411 "MAP": lambda self: self._parse_map(), 412 } 413 414 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 415 TokenType.SEMI, 416 TokenType.ANTI, 417 } 418 419 PLACEHOLDER_PARSERS = { 420 **parser.Parser.PLACEHOLDER_PARSERS, 421 TokenType.PARAMETER: lambda self: ( 422 self.expression(exp.Placeholder, this=self._prev.text) 423 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 424 else None 425 ), 426 } 427 428 TYPE_CONVERTERS = { 429 # https://duckdb.org/docs/sql/data_types/numeric 430 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 431 # https://duckdb.org/docs/sql/data_types/text 432 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 433 } 434 435 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 436 # https://duckdb.org/docs/sql/samples.html 437 sample = super()._parse_table_sample(as_modifier=as_modifier) 438 if sample and not sample.args.get("method"): 439 if sample.args.get("size"): 440 sample.set("method", exp.var("RESERVOIR")) 441 else: 442 sample.set("method", exp.var("SYSTEM")) 443 444 return sample 445 446 def _parse_bracket( 447 self, this: t.Optional[exp.Expression] = None 448 ) -> t.Optional[exp.Expression]: 449 bracket = super()._parse_bracket(this) 450 if isinstance(bracket, exp.Bracket): 451 bracket.set("returns_list_for_maps", True) 452 453 return bracket 454 455 def _parse_map(self) -> exp.ToMap | exp.Map: 456 if self._match(TokenType.L_BRACE, advance=False): 457 return self.expression(exp.ToMap, this=self._parse_bracket()) 458 459 args = self._parse_wrapped_csv(self._parse_assignment) 460 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 461 462 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 463 return self._parse_field_def() 464 465 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 466 if len(aggregations) == 1: 467 return super()._pivot_column_names(aggregations) 468 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
470 class Generator(generator.Generator): 471 PARAMETER_TOKEN = "$" 472 NAMED_PLACEHOLDER_TOKEN = "$" 473 JOIN_HINTS = False 474 TABLE_HINTS = False 475 QUERY_HINTS = False 476 LIMIT_FETCH = "LIMIT" 477 STRUCT_DELIMITER = ("(", ")") 478 RENAME_TABLE_WITH_DB = False 479 NVL2_SUPPORTED = False 480 SEMI_ANTI_JOIN_WITH_SIDE = False 481 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 482 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 483 LAST_DAY_SUPPORTS_DATE_PART = False 484 JSON_KEY_VALUE_PAIR_SEP = "," 485 IGNORE_NULLS_IN_FUNC = True 486 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 487 SUPPORTS_CREATE_TABLE_LIKE = False 488 MULTI_ARG_DISTINCT = False 489 CAN_IMPLEMENT_ARRAY_ANY = True 490 SUPPORTS_TO_NUMBER = False 491 COPY_HAS_INTO_KEYWORD = False 492 STAR_EXCEPT = "EXCLUDE" 493 PAD_FILL_PATTERN_IS_REQUIRED = True 494 ARRAY_CONCAT_IS_VAR_LEN = False 495 496 TRANSFORMS = { 497 **generator.Generator.TRANSFORMS, 498 exp.ApproxDistinct: approx_count_distinct_sql, 499 exp.Array: inline_array_unless_query, 500 exp.ArrayFilter: rename_func("LIST_FILTER"), 501 exp.ArraySize: rename_func("ARRAY_LENGTH"), 502 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 503 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 504 exp.ArraySort: _array_sort_sql, 505 exp.ArraySum: rename_func("LIST_SUM"), 506 exp.BitwiseXor: rename_func("XOR"), 507 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 508 exp.CurrentDate: lambda *_: "CURRENT_DATE", 509 exp.CurrentTime: lambda *_: "CURRENT_TIME", 510 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 511 exp.DayOfMonth: rename_func("DAYOFMONTH"), 512 exp.DayOfWeek: rename_func("DAYOFWEEK"), 513 exp.DayOfWeekIso: rename_func("ISODOW"), 514 exp.DayOfYear: rename_func("DAYOFYEAR"), 515 exp.DataType: _datatype_sql, 516 exp.Date: _date_sql, 517 exp.DateAdd: _date_delta_sql, 518 exp.DateFromParts: rename_func("MAKE_DATE"), 519 exp.DateSub: _date_delta_sql, 520 exp.DateDiff: _date_diff_sql, 521 exp.DateStrToDate: datestrtodate_sql, 522 exp.Datetime: no_datetime_sql, 523 exp.DatetimeSub: _date_delta_sql, 524 exp.DatetimeAdd: _date_delta_sql, 525 exp.DateToDi: lambda self, 526 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 527 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 528 exp.DiToDate: lambda self, 529 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 530 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 531 exp.GenerateDateArray: _generate_datetime_array_sql, 532 exp.GenerateTimestampArray: _generate_datetime_array_sql, 533 exp.Explode: rename_func("UNNEST"), 534 exp.IntDiv: lambda self, e: self.binary(e, "//"), 535 exp.IsInf: rename_func("ISINF"), 536 exp.IsNan: rename_func("ISNAN"), 537 exp.JSONBExists: rename_func("JSON_EXISTS"), 538 exp.JSONExtract: _arrow_json_extract_sql, 539 exp.JSONExtractScalar: _arrow_json_extract_sql, 540 exp.JSONFormat: _json_format_sql, 541 exp.Lateral: explode_to_unnest_sql, 542 exp.LogicalOr: rename_func("BOOL_OR"), 543 exp.LogicalAnd: rename_func("BOOL_AND"), 544 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 545 exp.MonthsBetween: lambda self, e: self.func( 546 "DATEDIFF", 547 "'month'", 548 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 549 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 550 ), 551 exp.PercentileCont: rename_func("QUANTILE_CONT"), 552 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 553 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 554 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 555 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 556 exp.RegexpReplace: lambda self, e: self.func( 557 "REGEXP_REPLACE", 558 e.this, 559 e.expression, 560 e.args.get("replacement"), 561 e.args.get("modifiers"), 562 ), 563 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 564 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 565 exp.Return: lambda self, e: self.sql(e, "this"), 566 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 567 exp.Rand: rename_func("RANDOM"), 568 exp.SafeDivide: no_safe_divide_sql, 569 exp.SHA: rename_func("SHA1"), 570 exp.SHA2: sha256_sql, 571 exp.Split: rename_func("STR_SPLIT"), 572 exp.SortArray: _sort_array_sql, 573 exp.StrPosition: str_position_sql, 574 exp.StrToUnix: lambda self, e: self.func( 575 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 576 ), 577 exp.Struct: _struct_sql, 578 exp.Transform: rename_func("LIST_TRANSFORM"), 579 exp.TimeAdd: _date_delta_sql, 580 exp.Time: no_time_sql, 581 exp.TimeDiff: _timediff_sql, 582 exp.Timestamp: no_timestamp_sql, 583 exp.TimestampDiff: lambda self, e: self.func( 584 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 585 ), 586 exp.TimestampTrunc: timestamptrunc_sql(), 587 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 588 exp.TimeStrToTime: timestrtotime_sql, 589 exp.TimeStrToUnix: lambda self, e: self.func( 590 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 591 ), 592 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 593 exp.TimeToUnix: rename_func("EPOCH"), 594 exp.TsOrDiToDi: lambda self, 595 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 596 exp.TsOrDsAdd: _date_delta_sql, 597 exp.TsOrDsDiff: lambda self, e: self.func( 598 "DATE_DIFF", 599 f"'{e.args.get('unit') or 'DAY'}'", 600 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 601 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 602 ), 603 exp.UnixToStr: lambda self, e: self.func( 604 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 605 ), 606 exp.DatetimeTrunc: lambda self, e: self.func( 607 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 608 ), 609 exp.UnixToTime: _unix_to_time_sql, 610 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 611 exp.VariancePop: rename_func("VAR_POP"), 612 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 613 exp.Xor: bool_xor_sql, 614 } 615 616 SUPPORTED_JSON_PATH_PARTS = { 617 exp.JSONPathKey, 618 exp.JSONPathRoot, 619 exp.JSONPathSubscript, 620 exp.JSONPathWildcard, 621 } 622 623 TYPE_MAPPING = { 624 **generator.Generator.TYPE_MAPPING, 625 exp.DataType.Type.BINARY: "BLOB", 626 exp.DataType.Type.BPCHAR: "TEXT", 627 exp.DataType.Type.CHAR: "TEXT", 628 exp.DataType.Type.FLOAT: "REAL", 629 exp.DataType.Type.NCHAR: "TEXT", 630 exp.DataType.Type.NVARCHAR: "TEXT", 631 exp.DataType.Type.UINT: "UINTEGER", 632 exp.DataType.Type.VARBINARY: "BLOB", 633 exp.DataType.Type.ROWVERSION: "BLOB", 634 exp.DataType.Type.VARCHAR: "TEXT", 635 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 636 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 637 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 638 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 639 } 640 641 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 642 RESERVED_KEYWORDS = { 643 "array", 644 "analyse", 645 "union", 646 "all", 647 "when", 648 "in_p", 649 "default", 650 "create_p", 651 "window", 652 "asymmetric", 653 "to", 654 "else", 655 "localtime", 656 "from", 657 "end_p", 658 "select", 659 "current_date", 660 "foreign", 661 "with", 662 "grant", 663 "session_user", 664 "or", 665 "except", 666 "references", 667 "fetch", 668 "limit", 669 "group_p", 670 "leading", 671 "into", 672 "collate", 673 "offset", 674 "do", 675 "then", 676 "localtimestamp", 677 "check_p", 678 "lateral_p", 679 "current_role", 680 "where", 681 "asc_p", 682 "placing", 683 "desc_p", 684 "user", 685 "unique", 686 "initially", 687 "column", 688 "both", 689 "some", 690 "as", 691 "any", 692 "only", 693 "deferrable", 694 "null_p", 695 "current_time", 696 "true_p", 697 "table", 698 "case", 699 "trailing", 700 "variadic", 701 "for", 702 "on", 703 "distinct", 704 "false_p", 705 "not", 706 "constraint", 707 "current_timestamp", 708 "returning", 709 "primary", 710 "intersect", 711 "having", 712 "analyze", 713 "current_user", 714 "and", 715 "cast", 716 "symmetric", 717 "using", 718 "order", 719 "current_catalog", 720 } 721 722 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 723 724 # DuckDB doesn't generally support CREATE TABLE .. properties 725 # https://duckdb.org/docs/sql/statements/create_table.html 726 PROPERTIES_LOCATION = { 727 prop: exp.Properties.Location.UNSUPPORTED 728 for prop in generator.Generator.PROPERTIES_LOCATION 729 } 730 731 # There are a few exceptions (e.g. temporary tables) which are supported or 732 # can be transpiled to DuckDB, so we explicitly override them accordingly 733 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 734 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 735 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 736 737 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 738 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 739 740 def strtotime_sql(self, expression: exp.StrToTime) -> str: 741 if expression.args.get("safe"): 742 formatted_time = self.format_time(expression) 743 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 744 return str_to_time_sql(self, expression) 745 746 def strtodate_sql(self, expression: exp.StrToDate) -> str: 747 if expression.args.get("safe"): 748 formatted_time = self.format_time(expression) 749 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 750 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 751 752 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 753 arg = expression.this 754 if expression.args.get("safe"): 755 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 756 return self.func("JSON", arg) 757 758 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 759 nano = expression.args.get("nano") 760 if nano is not None: 761 expression.set( 762 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 763 ) 764 765 return rename_func("MAKE_TIME")(self, expression) 766 767 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 768 sec = expression.args["sec"] 769 770 milli = expression.args.get("milli") 771 if milli is not None: 772 sec += milli.pop() / exp.Literal.number(1000.0) 773 774 nano = expression.args.get("nano") 775 if nano is not None: 776 sec += nano.pop() / exp.Literal.number(1000000000.0) 777 778 if milli or nano: 779 expression.set("sec", sec) 780 781 return rename_func("MAKE_TIMESTAMP")(self, expression) 782 783 def tablesample_sql( 784 self, 785 expression: exp.TableSample, 786 tablesample_keyword: t.Optional[str] = None, 787 ) -> str: 788 if not isinstance(expression.parent, exp.Select): 789 # This sample clause only applies to a single source, not the entire resulting relation 790 tablesample_keyword = "TABLESAMPLE" 791 792 if expression.args.get("size"): 793 method = expression.args.get("method") 794 if method and method.name.upper() != "RESERVOIR": 795 self.unsupported( 796 f"Sampling method {method} is not supported with a discrete sample count, " 797 "defaulting to reservoir sampling" 798 ) 799 expression.set("method", exp.var("RESERVOIR")) 800 801 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 802 803 def interval_sql(self, expression: exp.Interval) -> str: 804 multiplier: t.Optional[int] = None 805 unit = expression.text("unit").lower() 806 807 if unit.startswith("week"): 808 multiplier = 7 809 if unit.startswith("quarter"): 810 multiplier = 90 811 812 if multiplier: 813 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 814 815 return super().interval_sql(expression) 816 817 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 818 if isinstance(expression.parent, exp.UserDefinedFunction): 819 return self.sql(expression, "this") 820 return super().columndef_sql(expression, sep) 821 822 def join_sql(self, expression: exp.Join) -> str: 823 if ( 824 expression.side == "LEFT" 825 and not expression.args.get("on") 826 and isinstance(expression.this, exp.Unnest) 827 ): 828 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 829 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 830 return super().join_sql(expression.on(exp.true())) 831 832 return super().join_sql(expression) 833 834 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 835 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 836 if expression.args.get("is_end_exclusive"): 837 return rename_func("RANGE")(self, expression) 838 839 return self.function_fallback_sql(expression) 840 841 def bracket_sql(self, expression: exp.Bracket) -> str: 842 this = expression.this 843 if isinstance(this, exp.Array): 844 this.replace(exp.paren(this)) 845 846 bracket = super().bracket_sql(expression) 847 848 if not expression.args.get("returns_list_for_maps"): 849 if not this.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 this = annotate_types(this) 853 854 if this.is_type(exp.DataType.Type.MAP): 855 bracket = f"({bracket})[1]" 856 857 return bracket 858 859 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 860 expression_sql = self.sql(expression, "expression") 861 862 func = expression.this 863 if isinstance(func, exp.PERCENTILES): 864 # Make the order key the first arg and slide the fraction to the right 865 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 866 order_col = expression.find(exp.Ordered) 867 if order_col: 868 func.set("expression", func.this) 869 func.set("this", order_col.this) 870 871 this = self.sql(expression, "this").rstrip(")") 872 873 return f"{this}{expression_sql})" 874 875 def length_sql(self, expression: exp.Length) -> str: 876 arg = expression.this 877 878 # Dialects like BQ and Snowflake also accept binary values as args, so 879 # DDB will attempt to infer the type or resort to case/when resolution 880 if not expression.args.get("binary") or arg.is_string: 881 return self.func("LENGTH", arg) 882 883 if not arg.type: 884 from sqlglot.optimizer.annotate_types import annotate_types 885 886 arg = annotate_types(arg) 887 888 if arg.is_type(*exp.DataType.TEXT_TYPES): 889 return self.func("LENGTH", arg) 890 891 # We need these casts to make duckdb's static type checker happy 892 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 893 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 894 895 case = ( 896 exp.case(self.func("TYPEOF", arg)) 897 .when( 898 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 899 ) # anonymous to break length_sql recursion 900 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 901 ) 902 903 return self.sql(case) 904 905 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 906 this = expression.this 907 key = expression.args.get("key") 908 key_sql = key.name if isinstance(key, exp.Expression) else "" 909 value_sql = self.sql(expression, "value") 910 911 kv_sql = f"{key_sql} := {value_sql}" 912 913 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 914 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 915 if isinstance(this, exp.Struct) and not this.expressions: 916 return self.func("STRUCT_PACK", kv_sql) 917 918 return self.func("STRUCT_INSERT", this, kv_sql) 919 920 def unnest_sql(self, expression: exp.Unnest) -> str: 921 explode_array = expression.args.get("explode_array") 922 if explode_array: 923 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 924 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 925 expression.expressions.append( 926 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 927 ) 928 929 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 930 alias = expression.args.get("alias") 931 if alias: 932 expression.set("alias", None) 933 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 934 935 unnest_sql = super().unnest_sql(expression) 936 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 937 return self.sql(select) 938 939 return super().unnest_sql(expression) 940 941 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 942 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 943 # DuckDB should render IGNORE NULLS only for the general-purpose 944 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 945 return super().ignorenulls_sql(expression) 946 947 return self.sql(expression, "this") 948 949 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 950 this = self.sql(expression, "this") 951 null_text = self.sql(expression, "null") 952 953 if null_text: 954 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 955 956 return self.func("ARRAY_TO_STRING", this, expression.expression) 957 958 @unsupported_args("position", "occurrence") 959 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 960 group = expression.args.get("group") 961 params = expression.args.get("parameters") 962 963 # Do not render group if there is no following argument, 964 # and it's the default value for this dialect 965 if ( 966 not params 967 and group 968 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 969 ): 970 group = None 971 return self.func( 972 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 973 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
746 def strtodate_sql(self, expression: exp.StrToDate) -> str: 747 if expression.args.get("safe"): 748 formatted_time = self.format_time(expression) 749 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 750 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
758 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 759 nano = expression.args.get("nano") 760 if nano is not None: 761 expression.set( 762 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 763 ) 764 765 return rename_func("MAKE_TIME")(self, expression)
767 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 768 sec = expression.args["sec"] 769 770 milli = expression.args.get("milli") 771 if milli is not None: 772 sec += milli.pop() / exp.Literal.number(1000.0) 773 774 nano = expression.args.get("nano") 775 if nano is not None: 776 sec += nano.pop() / exp.Literal.number(1000000000.0) 777 778 if milli or nano: 779 expression.set("sec", sec) 780 781 return rename_func("MAKE_TIMESTAMP")(self, expression)
783 def tablesample_sql( 784 self, 785 expression: exp.TableSample, 786 tablesample_keyword: t.Optional[str] = None, 787 ) -> str: 788 if not isinstance(expression.parent, exp.Select): 789 # This sample clause only applies to a single source, not the entire resulting relation 790 tablesample_keyword = "TABLESAMPLE" 791 792 if expression.args.get("size"): 793 method = expression.args.get("method") 794 if method and method.name.upper() != "RESERVOIR": 795 self.unsupported( 796 f"Sampling method {method} is not supported with a discrete sample count, " 797 "defaulting to reservoir sampling" 798 ) 799 expression.set("method", exp.var("RESERVOIR")) 800 801 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
803 def interval_sql(self, expression: exp.Interval) -> str: 804 multiplier: t.Optional[int] = None 805 unit = expression.text("unit").lower() 806 807 if unit.startswith("week"): 808 multiplier = 7 809 if unit.startswith("quarter"): 810 multiplier = 90 811 812 if multiplier: 813 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 814 815 return super().interval_sql(expression)
822 def join_sql(self, expression: exp.Join) -> str: 823 if ( 824 expression.side == "LEFT" 825 and not expression.args.get("on") 826 and isinstance(expression.this, exp.Unnest) 827 ): 828 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 829 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 830 return super().join_sql(expression.on(exp.true())) 831 832 return super().join_sql(expression)
841 def bracket_sql(self, expression: exp.Bracket) -> str: 842 this = expression.this 843 if isinstance(this, exp.Array): 844 this.replace(exp.paren(this)) 845 846 bracket = super().bracket_sql(expression) 847 848 if not expression.args.get("returns_list_for_maps"): 849 if not this.type: 850 from sqlglot.optimizer.annotate_types import annotate_types 851 852 this = annotate_types(this) 853 854 if this.is_type(exp.DataType.Type.MAP): 855 bracket = f"({bracket})[1]" 856 857 return bracket
859 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 860 expression_sql = self.sql(expression, "expression") 861 862 func = expression.this 863 if isinstance(func, exp.PERCENTILES): 864 # Make the order key the first arg and slide the fraction to the right 865 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 866 order_col = expression.find(exp.Ordered) 867 if order_col: 868 func.set("expression", func.this) 869 func.set("this", order_col.this) 870 871 this = self.sql(expression, "this").rstrip(")") 872 873 return f"{this}{expression_sql})"
875 def length_sql(self, expression: exp.Length) -> str: 876 arg = expression.this 877 878 # Dialects like BQ and Snowflake also accept binary values as args, so 879 # DDB will attempt to infer the type or resort to case/when resolution 880 if not expression.args.get("binary") or arg.is_string: 881 return self.func("LENGTH", arg) 882 883 if not arg.type: 884 from sqlglot.optimizer.annotate_types import annotate_types 885 886 arg = annotate_types(arg) 887 888 if arg.is_type(*exp.DataType.TEXT_TYPES): 889 return self.func("LENGTH", arg) 890 891 # We need these casts to make duckdb's static type checker happy 892 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 893 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 894 895 case = ( 896 exp.case(self.func("TYPEOF", arg)) 897 .when( 898 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 899 ) # anonymous to break length_sql recursion 900 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 901 ) 902 903 return self.sql(case)
905 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 906 this = expression.this 907 key = expression.args.get("key") 908 key_sql = key.name if isinstance(key, exp.Expression) else "" 909 value_sql = self.sql(expression, "value") 910 911 kv_sql = f"{key_sql} := {value_sql}" 912 913 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 914 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 915 if isinstance(this, exp.Struct) and not this.expressions: 916 return self.func("STRUCT_PACK", kv_sql) 917 918 return self.func("STRUCT_INSERT", this, kv_sql)
920 def unnest_sql(self, expression: exp.Unnest) -> str: 921 explode_array = expression.args.get("explode_array") 922 if explode_array: 923 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 924 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 925 expression.expressions.append( 926 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 927 ) 928 929 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 930 alias = expression.args.get("alias") 931 if alias: 932 expression.set("alias", None) 933 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 934 935 unnest_sql = super().unnest_sql(expression) 936 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 937 return self.sql(select) 938 939 return super().unnest_sql(expression)
941 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 942 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 943 # DuckDB should render IGNORE NULLS only for the general-purpose 944 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 945 return super().ignorenulls_sql(expression) 946 947 return self.sql(expression, "this")
949 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 950 this = self.sql(expression, "this") 951 null_text = self.sql(expression, "null") 952 953 if null_text: 954 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 955 956 return self.func("ARRAY_TO_STRING", this, expression.expression)
958 @unsupported_args("position", "occurrence") 959 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 960 group = expression.args.get("group") 961 params = expression.args.get("parameters") 962 963 # Do not render group if there is no following argument, 964 # and it's the default value for this dialect 965 if ( 966 not params 967 and group 968 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 969 ): 970 group = None 971 return self.func( 972 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 973 )
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql