Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20#[cfg(feature = "generate")]
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
25pub struct FabricDialect;
26
27impl DialectImpl for FabricDialect {
28    fn dialect_type(&self) -> DialectType {
29        DialectType::Fabric
30    }
31
32    fn tokenizer_config(&self) -> TokenizerConfig {
33        // Inherit from T-SQL
34        let tsql = TSQLDialect;
35        tsql.tokenizer_config()
36    }
37
38    #[cfg(feature = "generate")]
39
40    fn generator_config(&self) -> GeneratorConfig {
41        use crate::generator::IdentifierQuoteStyle;
42        // Inherit from T-SQL with Fabric dialect type
43        GeneratorConfig {
44            // Use square brackets like T-SQL
45            identifier_quote: '[',
46            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
47            dialect: Some(DialectType::Fabric),
48            null_ordering_supported: false,
49            aggregate_filter_supported: false,
50            cte_recursive_keyword_required: false,
51            ..Default::default()
52        }
53    }
54
55    #[cfg(feature = "transpile")]
56
57    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
59        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
60        if let Expression::CreateTable(mut ct) = expr {
61            for col in &mut ct.columns {
62                match &col.data_type {
63                    DataType::VarChar { length: None, .. } => {
64                        col.data_type = DataType::VarChar {
65                            length: Some(1),
66                            parenthesized_length: false,
67                        };
68                    }
69                    DataType::Char { length: None } => {
70                        col.data_type = DataType::Char { length: Some(1) };
71                    }
72                    _ => {}
73                }
74                // Also transform column data types through Fabric's type mappings.
75                // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
76                let tsql = TSQLDialect;
77                if let Ok(Expression::DataType(tsql_dt)) =
78                    tsql.transform_data_type(col.data_type.clone())
79                {
80                    col.data_type = tsql_dt;
81                }
82                if let Expression::DataType(new_dt) =
83                    self.transform_fabric_data_type(col.data_type.clone())?
84                {
85                    col.data_type = new_dt;
86                }
87            }
88            return Ok(Expression::CreateTable(ct));
89        }
90
91        // Handle DataType::Timestamp specially BEFORE T-SQL transform
92        // because TSQL loses precision info when converting Timestamp to DATETIME2
93        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
94            let p = FabricDialect::cap_precision(*precision, 6);
95            return Ok(Expression::DataType(DataType::Custom {
96                name: format!("DATETIME2({})", p),
97            }));
98        }
99
100        // Handle DataType::Time specially BEFORE T-SQL transform
101        // to ensure we get default precision of 6
102        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
103            let p = FabricDialect::cap_precision(*precision, 6);
104            return Ok(Expression::DataType(DataType::Custom {
105                name: format!("TIME({})", p),
106            }));
107        }
108
109        // Handle DataType::Decimal specially BEFORE T-SQL transform
110        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
111        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
112            let name = Self::decimal_type_name(*precision, *scale);
113            return Ok(Expression::DataType(DataType::Custom { name }));
114        }
115
116        // Handle AT TIME ZONE with TIMESTAMPTZ cast
117        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
118        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
119        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
120        if let Expression::AtTimeZone(ref at_tz) = expr {
121            // Check if this contains a TIMESTAMPTZ cast
122            if let Expression::Cast(ref inner_cast) = at_tz.this {
123                if let DataType::Timestamp {
124                    timezone: true,
125                    precision,
126                } = &inner_cast.to
127                {
128                    // Get precision, default 6, cap at 6
129                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
130
131                    // Create inner DATETIMEOFFSET cast
132                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
133                        this: inner_cast.this.clone(),
134                        to: DataType::Custom {
135                            name: format!("DATETIMEOFFSET({})", capped_precision),
136                        },
137                        trailing_comments: inner_cast.trailing_comments.clone(),
138                        double_colon_syntax: false,
139                        format: None,
140                        default: None,
141                        inferred_type: None,
142                    }));
143
144                    // Create new AT TIME ZONE with DATETIMEOFFSET
145                    let new_at_tz =
146                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
147                            this: datetimeoffset_cast,
148                            zone: at_tz.zone.clone(),
149                        }));
150
151                    // Wrap in outer DATETIME2 cast
152                    return Ok(Expression::Cast(Box::new(Cast {
153                        this: new_at_tz,
154                        to: DataType::Custom {
155                            name: format!("DATETIME2({})", capped_precision),
156                        },
157                        trailing_comments: Vec::new(),
158                        double_colon_syntax: false,
159                        format: None,
160                        default: None,
161                        inferred_type: None,
162                    })));
163                }
164            }
165        }
166
167        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
168        // Reference: Python sqlglot Fabric dialect unixtotime_sql
169        if let Expression::UnixToTime(ref f) = expr {
170            // Build: column * 1e6
171            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
172                left: (*f.this).clone(),
173                right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
174                left_comments: Vec::new(),
175                operator_comments: Vec::new(),
176                trailing_comments: Vec::new(),
177                inferred_type: None,
178            }));
179
180            // Build: ROUND(column * 1e6, 0)
181            let round_expr = Expression::Function(Box::new(Function::new(
182                "ROUND".to_string(),
183                vec![
184                    column_times_1e6,
185                    Expression::Literal(Box::new(Literal::Number("0".to_string()))),
186                ],
187            )));
188
189            // Build: CAST(ROUND(...) AS BIGINT)
190            let cast_to_bigint = Expression::Cast(Box::new(Cast {
191                this: round_expr,
192                to: DataType::BigInt { length: None },
193                trailing_comments: Vec::new(),
194                double_colon_syntax: false,
195                format: None,
196                default: None,
197                inferred_type: None,
198            }));
199
200            // Build: CAST('1970-01-01' AS DATETIME2(6))
201            let epoch_start = Expression::Cast(Box::new(Cast {
202                this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
203                to: DataType::Custom {
204                    name: "DATETIME2(6)".to_string(),
205                },
206                trailing_comments: Vec::new(),
207                double_colon_syntax: false,
208                format: None,
209                default: None,
210                inferred_type: None,
211            }));
212
213            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
214            let dateadd = Expression::Function(Box::new(Function::new(
215                "DATEADD".to_string(),
216                vec![
217                    Expression::Identifier(Identifier::new("MICROSECONDS")),
218                    cast_to_bigint,
219                    epoch_start,
220                ],
221            )));
222
223            return Ok(dateadd);
224        }
225
226        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
227        // Reference: Python sqlglot Fabric dialect unixtotime_sql
228        if let Expression::Function(ref f) = expr {
229            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
230                let timestamp_input = f.args[0].clone();
231
232                // Build: column * 1e6
233                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
234                    left: timestamp_input,
235                    right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
236                    left_comments: Vec::new(),
237                    operator_comments: Vec::new(),
238                    trailing_comments: Vec::new(),
239                    inferred_type: None,
240                }));
241
242                // Build: ROUND(column * 1e6, 0)
243                let round_expr = Expression::Function(Box::new(Function::new(
244                    "ROUND".to_string(),
245                    vec![
246                        column_times_1e6,
247                        Expression::Literal(Box::new(Literal::Number("0".to_string()))),
248                    ],
249                )));
250
251                // Build: CAST(ROUND(...) AS BIGINT)
252                let cast_to_bigint = Expression::Cast(Box::new(Cast {
253                    this: round_expr,
254                    to: DataType::BigInt { length: None },
255                    trailing_comments: Vec::new(),
256                    double_colon_syntax: false,
257                    format: None,
258                    default: None,
259                    inferred_type: None,
260                }));
261
262                // Build: CAST('1970-01-01' AS DATETIME2(6))
263                let epoch_start = Expression::Cast(Box::new(Cast {
264                    this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
265                    to: DataType::Custom {
266                        name: "DATETIME2(6)".to_string(),
267                    },
268                    trailing_comments: Vec::new(),
269                    double_colon_syntax: false,
270                    format: None,
271                    default: None,
272                    inferred_type: None,
273                }));
274
275                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
276                let dateadd = Expression::Function(Box::new(Function::new(
277                    "DATEADD".to_string(),
278                    vec![
279                        Expression::Identifier(Identifier::new("MICROSECONDS")),
280                        cast_to_bigint,
281                        epoch_start,
282                    ],
283                )));
284
285                return Ok(dateadd);
286            }
287        }
288
289        // Delegate to T-SQL for other transformations
290        let tsql = TSQLDialect;
291        let transformed = tsql.transform_expr(expr)?;
292
293        // Apply Fabric-specific transformations to the result
294        self.transform_fabric_expr(transformed)
295    }
296}
297
298#[cfg(feature = "transpile")]
299impl FabricDialect {
300    /// Fabric-specific expression transformations
301    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
302        match expr {
303            // Handle DataType expressions with Fabric-specific type mappings
304            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
305
306            // Pass through everything else
307            _ => Ok(expr),
308        }
309    }
310
311    /// Transform data types according to Fabric TYPE_MAPPING
312    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
313    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
314        let transformed = match dt {
315            // TIMESTAMP -> DATETIME2(6) with precision handling
316            // Note: TSQL already converts this to DATETIME2, but without precision
317            DataType::Timestamp { precision, .. } => {
318                let p = Self::cap_precision(precision, 6);
319                DataType::Custom {
320                    name: format!("DATETIME2({})", p),
321                }
322            }
323
324            // TIME -> TIME(6) default, capped at 6
325            DataType::Time { precision, .. } => {
326                let p = Self::cap_precision(precision, 6);
327                DataType::Custom {
328                    name: format!("TIME({})", p),
329                }
330            }
331
332            // INT -> INT (override TSQL which may output INTEGER)
333            DataType::Int { .. } => DataType::Custom {
334                name: "INT".to_string(),
335            },
336
337            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
338            DataType::Decimal { precision, scale } => DataType::Custom {
339                name: Self::decimal_type_name(precision, scale),
340            },
341
342            // JSON -> VARCHAR
343            DataType::Json => DataType::Custom {
344                name: "VARCHAR".to_string(),
345            },
346
347            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
348            DataType::Uuid => DataType::Custom {
349                name: "UNIQUEIDENTIFIER".to_string(),
350            },
351
352            // TinyInt -> SMALLINT
353            DataType::TinyInt { .. } => DataType::Custom {
354                name: "SMALLINT".to_string(),
355            },
356
357            // Handle Custom types for Fabric-specific mappings
358            DataType::Custom { ref name } => {
359                let upper = name.to_uppercase();
360
361                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
362                let (base_name, precision, scale) =
363                    TSQLDialect::parse_type_precision_and_scale(&upper);
364                let has_max_length = upper.contains("(MAX)");
365
366                match base_name.as_str() {
367                    // DATETIME -> DATETIME2(6)
368                    "DATETIME" => DataType::Custom {
369                        name: "DATETIME2(6)".to_string(),
370                    },
371
372                    // SMALLDATETIME -> DATETIME2(6)
373                    "SMALLDATETIME" => DataType::Custom {
374                        name: "DATETIME2(6)".to_string(),
375                    },
376
377                    // DATETIME2 -> DATETIME2(6) default, cap at 6
378                    "DATETIME2" => {
379                        let p = Self::cap_precision(precision, 6);
380                        DataType::Custom {
381                            name: format!("DATETIME2({})", p),
382                        }
383                    }
384
385                    // DATETIMEOFFSET -> cap precision at 6
386                    "DATETIMEOFFSET" => {
387                        let p = Self::cap_precision(precision, 6);
388                        DataType::Custom {
389                            name: format!("DATETIMEOFFSET({})", p),
390                        }
391                    }
392
393                    // TIME -> TIME(6) default, cap at 6
394                    "TIME" => {
395                        let p = Self::cap_precision(precision, 6);
396                        DataType::Custom {
397                            name: format!("TIME({})", p),
398                        }
399                    }
400
401                    // TIMESTAMP -> DATETIME2(6)
402                    "TIMESTAMP" => DataType::Custom {
403                        name: "DATETIME2(6)".to_string(),
404                    },
405
406                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
407                    "TIMESTAMPNTZ" => {
408                        let p = Self::cap_precision(precision, 6);
409                        DataType::Custom {
410                            name: format!("DATETIME2({})", p),
411                        }
412                    }
413
414                    // TIMESTAMPTZ -> DATETIME2(6) with precision
415                    "TIMESTAMPTZ" => {
416                        let p = Self::cap_precision(precision, 6);
417                        DataType::Custom {
418                            name: format!("DATETIME2({})", p),
419                        }
420                    }
421
422                    // IMAGE -> VARBINARY
423                    "IMAGE" => DataType::Custom {
424                        name: "VARBINARY".to_string(),
425                    },
426
427                    // MONEY -> DECIMAL
428                    "MONEY" => DataType::Custom {
429                        name: "DECIMAL".to_string(),
430                    },
431
432                    // SMALLMONEY -> DECIMAL
433                    "SMALLMONEY" => DataType::Custom {
434                        name: "DECIMAL".to_string(),
435                    },
436
437                    // NCHAR -> CHAR (with length preserved)
438                    "NCHAR" => {
439                        if has_max_length {
440                            DataType::Custom {
441                                name: "CHAR(MAX)".to_string(),
442                            }
443                        } else if let Some(len) = precision {
444                            DataType::Custom {
445                                name: format!("CHAR({})", len),
446                            }
447                        } else {
448                            DataType::Custom {
449                                name: "CHAR".to_string(),
450                            }
451                        }
452                    }
453
454                    // NVARCHAR -> VARCHAR (with length preserved)
455                    "NVARCHAR" => {
456                        if has_max_length {
457                            DataType::Custom {
458                                name: "VARCHAR(MAX)".to_string(),
459                            }
460                        } else if let Some(len) = precision {
461                            DataType::Custom {
462                                name: format!("VARCHAR({})", len),
463                            }
464                        } else {
465                            DataType::Custom {
466                                name: "VARCHAR".to_string(),
467                            }
468                        }
469                    }
470
471                    // TINYINT -> SMALLINT
472                    "TINYINT" => DataType::Custom {
473                        name: "SMALLINT".to_string(),
474                    },
475
476                    // UTINYINT -> SMALLINT
477                    "UTINYINT" => DataType::Custom {
478                        name: "SMALLINT".to_string(),
479                    },
480
481                    // VARIANT -> SQL_VARIANT
482                    "VARIANT" => DataType::Custom {
483                        name: "SQL_VARIANT".to_string(),
484                    },
485
486                    // XML -> VARCHAR
487                    "XML" => DataType::Custom {
488                        name: "VARCHAR".to_string(),
489                    },
490
491                    // NUMERIC -> DECIMAL (override TSQL's conversion)
492                    // Fabric uses DECIMAL, not NUMERIC
493                    "DECIMAL" | "NUMERIC" => DataType::Custom {
494                        name: Self::decimal_type_name(precision, scale),
495                    },
496
497                    // Pass through other custom types unchanged
498                    _ => dt,
499                }
500            }
501
502            // Keep all other types as transformed by TSQL
503            other => other,
504        };
505
506        Ok(Expression::DataType(transformed))
507    }
508
509    /// Cap precision to max value, defaulting to max if not specified
510    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
511        match precision {
512            Some(p) if p > max => max,
513            Some(p) => p,
514            None => max, // Default to max if not specified
515        }
516    }
517
518    fn decimal_type_name(precision: Option<u32>, scale: Option<u32>) -> String {
519        match (precision, scale) {
520            (Some(p), Some(s)) => format!("DECIMAL({}, {})", p, s),
521            (Some(p), None) => format!("DECIMAL({})", p),
522            (None, _) => "DECIMAL".to_string(),
523        }
524    }
525}