sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15) 16from sqlglot.dialects.postgres import Postgres 17from sqlglot.helper import seq_get 18from sqlglot.tokens import TokenType 19 20if t.TYPE_CHECKING: 21 from sqlglot._typing import E 22 23 24def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 25 def _builder(args: t.List) -> E: 26 expr = expr_type(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 27 if expr_type is exp.TsOrDsAdd: 28 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 29 30 return expr 31 32 return _builder 33 34 35class Redshift(Postgres): 36 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 37 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 38 39 SUPPORTS_USER_DEFINED_TYPES = False 40 INDEX_OFFSET = 0 41 42 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 43 TIME_MAPPING = { 44 **Postgres.TIME_MAPPING, 45 "MON": "%b", 46 "HH": "%H", 47 } 48 49 class Parser(Postgres.Parser): 50 FUNCTIONS = { 51 **Postgres.Parser.FUNCTIONS, 52 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 53 this=seq_get(args, 0), 54 expression=seq_get(args, 1), 55 unit=exp.var("month"), 56 return_type=exp.DataType.build("TIMESTAMP"), 57 ), 58 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 59 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 61 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 62 "GETDATE": exp.CurrentTimestamp.from_arg_list, 63 "LISTAGG": exp.GroupConcat.from_arg_list, 64 "STRTOL": exp.FromBase.from_arg_list, 65 } 66 67 NO_PAREN_FUNCTION_PARSERS = { 68 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 69 "APPROXIMATE": lambda self: self._parse_approximate_count(), 70 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 71 } 72 73 SUPPORTS_IMPLICIT_UNNEST = True 74 75 def _parse_table( 76 self, 77 schema: bool = False, 78 joins: bool = False, 79 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 80 parse_bracket: bool = False, 81 is_db_reference: bool = False, 82 ) -> t.Optional[exp.Expression]: 83 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 84 unpivot = self._match(TokenType.UNPIVOT) 85 table = super()._parse_table( 86 schema=schema, 87 joins=joins, 88 alias_tokens=alias_tokens, 89 parse_bracket=parse_bracket, 90 is_db_reference=is_db_reference, 91 ) 92 93 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 94 95 def _parse_convert( 96 self, strict: bool, safe: t.Optional[bool] = None 97 ) -> t.Optional[exp.Expression]: 98 to = self._parse_types() 99 self._match(TokenType.COMMA) 100 this = self._parse_bitwise() 101 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 102 103 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 104 index = self._index - 1 105 func = self._parse_function() 106 107 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 108 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 109 self._retreat(index) 110 return None 111 112 class Tokenizer(Postgres.Tokenizer): 113 BIT_STRINGS = [] 114 HEX_STRINGS = [] 115 STRING_ESCAPES = ["\\", "'"] 116 117 KEYWORDS = { 118 **Postgres.Tokenizer.KEYWORDS, 119 "HLLSKETCH": TokenType.HLLSKETCH, 120 "SUPER": TokenType.SUPER, 121 "TOP": TokenType.TOP, 122 "UNLOAD": TokenType.COMMAND, 123 "VARBYTE": TokenType.VARBINARY, 124 } 125 KEYWORDS.pop("VALUES") 126 127 # Redshift allows # to appear as a table identifier prefix 128 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("#") 130 131 class Generator(Postgres.Generator): 132 LOCKING_READS_SUPPORTED = False 133 QUERY_HINTS = False 134 VALUES_AS_TABLE = False 135 TZ_TO_WITH_TIME_ZONE = True 136 NVL2_SUPPORTED = True 137 LAST_DAY_SUPPORTS_DATE_PART = False 138 CAN_IMPLEMENT_ARRAY_ANY = False 139 MULTI_ARG_DISTINCT = True 140 141 TYPE_MAPPING = { 142 **Postgres.Generator.TYPE_MAPPING, 143 exp.DataType.Type.BINARY: "VARBYTE", 144 exp.DataType.Type.INT: "INTEGER", 145 exp.DataType.Type.TIMETZ: "TIME", 146 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 147 exp.DataType.Type.VARBINARY: "VARBYTE", 148 } 149 150 TRANSFORMS = { 151 **Postgres.Generator.TRANSFORMS, 152 exp.Concat: concat_to_dpipe_sql, 153 exp.ConcatWs: concat_ws_to_dpipe_sql, 154 exp.ApproxDistinct: lambda self, 155 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 156 exp.CurrentTimestamp: lambda self, e: ( 157 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 158 ), 159 exp.DateAdd: date_delta_sql("DATEADD"), 160 exp.DateDiff: date_delta_sql("DATEDIFF"), 161 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 162 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 163 exp.FromBase: rename_func("STRTOL"), 164 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 165 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 166 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.GroupConcat: rename_func("LISTAGG"), 168 exp.ParseJSON: rename_func("JSON_PARSE"), 169 exp.Select: transforms.preprocess( 170 [ 171 transforms.eliminate_distinct_on, 172 transforms.eliminate_semi_and_anti_joins, 173 transforms.unqualify_unnest, 174 ] 175 ), 176 exp.SortKeyProperty: lambda self, 177 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 178 exp.StartsWith: lambda self, 179 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 180 exp.TableSample: no_tablesample_sql, 181 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 182 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 183 exp.UnixToTime: lambda self, 184 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 185 } 186 187 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 188 TRANSFORMS.pop(exp.Pivot) 189 190 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 191 TRANSFORMS.pop(exp.Pow) 192 193 # Redshift supports ANY_VALUE(..) 194 TRANSFORMS.pop(exp.AnyValue) 195 196 # Redshift supports LAST_DAY(..) 197 TRANSFORMS.pop(exp.LastDay) 198 199 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 200 201 def unnest_sql(self, expression: exp.Unnest) -> str: 202 args = expression.expressions 203 num_args = len(args) 204 205 if num_args > 1: 206 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 207 return "" 208 209 arg = self.sql(seq_get(args, 0)) 210 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 211 return f"{arg} AS {alias}" if alias else arg 212 213 def with_properties(self, properties: exp.Properties) -> str: 214 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 215 return self.properties(properties, prefix=" ", suffix="") 216 217 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 218 if expression.is_type(exp.DataType.Type.JSON): 219 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 220 return self.sql(expression, "this") 221 222 return super().cast_sql(expression, safe_prefix=safe_prefix) 223 224 def datatype_sql(self, expression: exp.DataType) -> str: 225 """ 226 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 227 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 228 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 229 `TEXT` to `VARCHAR`. 230 """ 231 if expression.is_type("text"): 232 expression.set("this", exp.DataType.Type.VARCHAR) 233 precision = expression.args.get("expressions") 234 235 if not precision: 236 expression.append("expressions", exp.var("MAX")) 237 238 return super().datatype_sql(expression)
36class Redshift(Postgres): 37 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 38 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 39 40 SUPPORTS_USER_DEFINED_TYPES = False 41 INDEX_OFFSET = 0 42 43 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 44 TIME_MAPPING = { 45 **Postgres.TIME_MAPPING, 46 "MON": "%b", 47 "HH": "%H", 48 } 49 50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None 112 113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#") 131 132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [ 172 transforms.eliminate_distinct_on, 173 transforms.eliminate_semi_and_anti_joins, 174 transforms.unqualify_unnest, 175 ] 176 ), 177 exp.SortKeyProperty: lambda self, 178 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 179 exp.StartsWith: lambda self, 180 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 181 exp.TableSample: no_tablesample_sql, 182 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 183 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 184 exp.UnixToTime: lambda self, 185 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 186 } 187 188 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 189 TRANSFORMS.pop(exp.Pivot) 190 191 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 192 TRANSFORMS.pop(exp.Pow) 193 194 # Redshift supports ANY_VALUE(..) 195 TRANSFORMS.pop(exp.AnyValue) 196 197 # Redshift supports LAST_DAY(..) 198 TRANSFORMS.pop(exp.LastDay) 199 200 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 201 202 def unnest_sql(self, expression: exp.Unnest) -> str: 203 args = expression.expressions 204 num_args = len(args) 205 206 if num_args > 1: 207 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 208 return "" 209 210 arg = self.sql(seq_get(args, 0)) 211 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 212 return f"{arg} AS {alias}" if alias else arg 213 214 def with_properties(self, properties: exp.Properties) -> str: 215 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 216 return self.properties(properties, prefix=" ", suffix="") 217 218 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 219 if expression.is_type(exp.DataType.Type.JSON): 220 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 221 return self.sql(expression, "this") 222 223 return super().cast_sql(expression, safe_prefix=safe_prefix) 224 225 def datatype_sql(self, expression: exp.DataType) -> str: 226 """ 227 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 228 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 229 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 230 `TEXT` to `VARCHAR`. 231 """ 232 if expression.is_type("text"): 233 expression.set("this", exp.DataType.Type.VARCHAR) 234 precision = expression.args.get("expressions") 235 236 if not precision: 237 expression.append("expressions", exp.var("MAX")) 238 239 return super().datatype_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime formats.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
50 class Parser(Postgres.Parser): 51 FUNCTIONS = { 52 **Postgres.Parser.FUNCTIONS, 53 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 54 this=seq_get(args, 0), 55 expression=seq_get(args, 1), 56 unit=exp.var("month"), 57 return_type=exp.DataType.build("TIMESTAMP"), 58 ), 59 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 60 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 61 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 62 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 63 "GETDATE": exp.CurrentTimestamp.from_arg_list, 64 "LISTAGG": exp.GroupConcat.from_arg_list, 65 "STRTOL": exp.FromBase.from_arg_list, 66 } 67 68 NO_PAREN_FUNCTION_PARSERS = { 69 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 70 "APPROXIMATE": lambda self: self._parse_approximate_count(), 71 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 72 } 73 74 SUPPORTS_IMPLICIT_UNNEST = True 75 76 def _parse_table( 77 self, 78 schema: bool = False, 79 joins: bool = False, 80 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 81 parse_bracket: bool = False, 82 is_db_reference: bool = False, 83 ) -> t.Optional[exp.Expression]: 84 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 85 unpivot = self._match(TokenType.UNPIVOT) 86 table = super()._parse_table( 87 schema=schema, 88 joins=joins, 89 alias_tokens=alias_tokens, 90 parse_bracket=parse_bracket, 91 is_db_reference=is_db_reference, 92 ) 93 94 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 95 96 def _parse_convert( 97 self, strict: bool, safe: t.Optional[bool] = None 98 ) -> t.Optional[exp.Expression]: 99 to = self._parse_types() 100 self._match(TokenType.COMMA) 101 this = self._parse_bitwise() 102 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 103 104 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 105 index = self._index - 1 106 func = self._parse_function() 107 108 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 109 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 110 self._retreat(index) 111 return None
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
113 class Tokenizer(Postgres.Tokenizer): 114 BIT_STRINGS = [] 115 HEX_STRINGS = [] 116 STRING_ESCAPES = ["\\", "'"] 117 118 KEYWORDS = { 119 **Postgres.Tokenizer.KEYWORDS, 120 "HLLSKETCH": TokenType.HLLSKETCH, 121 "SUPER": TokenType.SUPER, 122 "TOP": TokenType.TOP, 123 "UNLOAD": TokenType.COMMAND, 124 "VARBYTE": TokenType.VARBINARY, 125 } 126 KEYWORDS.pop("VALUES") 127 128 # Redshift allows # to appear as a table identifier prefix 129 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("#")
Inherited Members
132 class Generator(Postgres.Generator): 133 LOCKING_READS_SUPPORTED = False 134 QUERY_HINTS = False 135 VALUES_AS_TABLE = False 136 TZ_TO_WITH_TIME_ZONE = True 137 NVL2_SUPPORTED = True 138 LAST_DAY_SUPPORTS_DATE_PART = False 139 CAN_IMPLEMENT_ARRAY_ANY = False 140 MULTI_ARG_DISTINCT = True 141 142 TYPE_MAPPING = { 143 **Postgres.Generator.TYPE_MAPPING, 144 exp.DataType.Type.BINARY: "VARBYTE", 145 exp.DataType.Type.INT: "INTEGER", 146 exp.DataType.Type.TIMETZ: "TIME", 147 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 148 exp.DataType.Type.VARBINARY: "VARBYTE", 149 } 150 151 TRANSFORMS = { 152 **Postgres.Generator.TRANSFORMS, 153 exp.Concat: concat_to_dpipe_sql, 154 exp.ConcatWs: concat_ws_to_dpipe_sql, 155 exp.ApproxDistinct: lambda self, 156 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 157 exp.CurrentTimestamp: lambda self, e: ( 158 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 159 ), 160 exp.DateAdd: date_delta_sql("DATEADD"), 161 exp.DateDiff: date_delta_sql("DATEDIFF"), 162 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 163 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 164 exp.FromBase: rename_func("STRTOL"), 165 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 166 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 167 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 168 exp.GroupConcat: rename_func("LISTAGG"), 169 exp.ParseJSON: rename_func("JSON_PARSE"), 170 exp.Select: transforms.preprocess( 171 [ 172 transforms.eliminate_distinct_on, 173 transforms.eliminate_semi_and_anti_joins, 174 transforms.unqualify_unnest, 175 ] 176 ), 177 exp.SortKeyProperty: lambda self, 178 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 179 exp.StartsWith: lambda self, 180 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 181 exp.TableSample: no_tablesample_sql, 182 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 183 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 184 exp.UnixToTime: lambda self, 185 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 186 } 187 188 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 189 TRANSFORMS.pop(exp.Pivot) 190 191 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 192 TRANSFORMS.pop(exp.Pow) 193 194 # Redshift supports ANY_VALUE(..) 195 TRANSFORMS.pop(exp.AnyValue) 196 197 # Redshift supports LAST_DAY(..) 198 TRANSFORMS.pop(exp.LastDay) 199 200 RESERVED_KEYWORDS = {*Postgres.Generator.RESERVED_KEYWORDS, "snapshot", "type"} 201 202 def unnest_sql(self, expression: exp.Unnest) -> str: 203 args = expression.expressions 204 num_args = len(args) 205 206 if num_args > 1: 207 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 208 return "" 209 210 arg = self.sql(seq_get(args, 0)) 211 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 212 return f"{arg} AS {alias}" if alias else arg 213 214 def with_properties(self, properties: exp.Properties) -> str: 215 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 216 return self.properties(properties, prefix=" ", suffix="") 217 218 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 219 if expression.is_type(exp.DataType.Type.JSON): 220 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 221 return self.sql(expression, "this") 222 223 return super().cast_sql(expression, safe_prefix=safe_prefix) 224 225 def datatype_sql(self, expression: exp.DataType) -> str: 226 """ 227 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 228 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 229 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 230 `TEXT` to `VARCHAR`. 231 """ 232 if expression.is_type("text"): 233 expression.set("this", exp.DataType.Type.VARCHAR) 234 precision = expression.args.get("expressions") 235 236 if not precision: 237 expression.append("expressions", exp.var("MAX")) 238 239 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
202 def unnest_sql(self, expression: exp.Unnest) -> str: 203 args = expression.expressions 204 num_args = len(args) 205 206 if num_args > 1: 207 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 208 return "" 209 210 arg = self.sql(seq_get(args, 0)) 211 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 212 return f"{arg} AS {alias}" if alias else arg
214 def with_properties(self, properties: exp.Properties) -> str: 215 """Redshift doesn't have `WITH` as part of their with_properties so we remove it""" 216 return self.properties(properties, prefix=" ", suffix="")
Redshift doesn't have WITH as part of their with_properties so we remove it
218 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 219 if expression.is_type(exp.DataType.Type.JSON): 220 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 221 return self.sql(expression, "this") 222 223 return super().cast_sql(expression, safe_prefix=safe_prefix)
225 def datatype_sql(self, expression: exp.DataType) -> str: 226 """ 227 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 228 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 229 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 230 `TEXT` to `VARCHAR`. 231 """ 232 if expression.is_type("text"): 233 expression.set("this", exp.DataType.Type.VARCHAR) 234 precision = expression.args.get("expressions") 235 236 if not precision: 237 expression.append("expressions", exp.var("MAX")) 238 239 return super().datatype_sql(expression)
Redshift converts the TEXT data type to VARCHAR(255) by default when people more generally mean
VARCHAR of max length which is VARCHAR(max) in Redshift. Therefore if we get a TEXT data type
without precision we convert it to VARCHAR(max) and if it does have precision then we just convert
TEXT to VARCHAR.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- OUTER_UNION_MODIFIERS
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- bracket_sql
- matchagainst_sql