Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::Fabric
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        // Inherit from T-SQL
33        let tsql = TSQLDialect;
34        tsql.tokenizer_config()
35    }
36
37    fn generator_config(&self) -> GeneratorConfig {
38        use crate::generator::IdentifierQuoteStyle;
39        // Inherit from T-SQL with Fabric dialect type
40        GeneratorConfig {
41            // Use square brackets like T-SQL
42            identifier_quote: '[',
43            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44            dialect: Some(DialectType::Fabric),
45            null_ordering_supported: false,
46            ..Default::default()
47        }
48    }
49
50    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
51        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
52        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
53        if let Expression::CreateTable(mut ct) = expr {
54            for col in &mut ct.columns {
55                match &col.data_type {
56                    DataType::VarChar { length: None, .. } => {
57                        col.data_type = DataType::VarChar {
58                            length: Some(1),
59                            parenthesized_length: false,
60                        };
61                    }
62                    DataType::Char { length: None } => {
63                        col.data_type = DataType::Char { length: Some(1) };
64                    }
65                    _ => {}
66                }
67                // Also transform column data types through Fabric's type mappings.
68                // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
69                let tsql = TSQLDialect;
70                if let Ok(Expression::DataType(tsql_dt)) =
71                    tsql.transform_data_type(col.data_type.clone())
72                {
73                    col.data_type = tsql_dt;
74                }
75                if let Expression::DataType(new_dt) =
76                    self.transform_fabric_data_type(col.data_type.clone())?
77                {
78                    col.data_type = new_dt;
79                }
80            }
81            return Ok(Expression::CreateTable(ct));
82        }
83
84        // Handle DataType::Timestamp specially BEFORE T-SQL transform
85        // because TSQL loses precision info when converting Timestamp to DATETIME2
86        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
87            let p = FabricDialect::cap_precision(*precision, 6);
88            return Ok(Expression::DataType(DataType::Custom {
89                name: format!("DATETIME2({})", p),
90            }));
91        }
92
93        // Handle DataType::Time specially BEFORE T-SQL transform
94        // to ensure we get default precision of 6
95        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
96            let p = FabricDialect::cap_precision(*precision, 6);
97            return Ok(Expression::DataType(DataType::Custom {
98                name: format!("TIME({})", p),
99            }));
100        }
101
102        // Handle DataType::Decimal specially BEFORE T-SQL transform
103        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
104        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
105            let name = if let (Some(p), Some(s)) = (precision, scale) {
106                format!("DECIMAL({}, {})", p, s)
107            } else if let Some(p) = precision {
108                format!("DECIMAL({})", p)
109            } else {
110                "DECIMAL".to_string()
111            };
112            return Ok(Expression::DataType(DataType::Custom { name }));
113        }
114
115        // Handle AT TIME ZONE with TIMESTAMPTZ cast
116        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
117        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
118        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
119        if let Expression::AtTimeZone(ref at_tz) = expr {
120            // Check if this contains a TIMESTAMPTZ cast
121            if let Expression::Cast(ref inner_cast) = at_tz.this {
122                if let DataType::Timestamp {
123                    timezone: true,
124                    precision,
125                } = &inner_cast.to
126                {
127                    // Get precision, default 6, cap at 6
128                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
129
130                    // Create inner DATETIMEOFFSET cast
131                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
132                        this: inner_cast.this.clone(),
133                        to: DataType::Custom {
134                            name: format!("DATETIMEOFFSET({})", capped_precision),
135                        },
136                        trailing_comments: inner_cast.trailing_comments.clone(),
137                        double_colon_syntax: false,
138                        format: None,
139                        default: None,
140                        inferred_type: None,
141                    }));
142
143                    // Create new AT TIME ZONE with DATETIMEOFFSET
144                    let new_at_tz =
145                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
146                            this: datetimeoffset_cast,
147                            zone: at_tz.zone.clone(),
148                        }));
149
150                    // Wrap in outer DATETIME2 cast
151                    return Ok(Expression::Cast(Box::new(Cast {
152                        this: new_at_tz,
153                        to: DataType::Custom {
154                            name: format!("DATETIME2({})", capped_precision),
155                        },
156                        trailing_comments: Vec::new(),
157                        double_colon_syntax: false,
158                        format: None,
159                        default: None,
160                        inferred_type: None,
161                    })));
162                }
163            }
164        }
165
166        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
167        // Reference: Python sqlglot Fabric dialect unixtotime_sql
168        if let Expression::UnixToTime(ref f) = expr {
169            // Build: column * 1e6
170            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
171                left: (*f.this).clone(),
172                right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
173                left_comments: Vec::new(),
174                operator_comments: Vec::new(),
175                trailing_comments: Vec::new(),
176                inferred_type: None,
177            }));
178
179            // Build: ROUND(column * 1e6, 0)
180            let round_expr = Expression::Function(Box::new(Function::new(
181                "ROUND".to_string(),
182                vec![
183                    column_times_1e6,
184                    Expression::Literal(Box::new(Literal::Number("0".to_string()))),
185                ],
186            )));
187
188            // Build: CAST(ROUND(...) AS BIGINT)
189            let cast_to_bigint = Expression::Cast(Box::new(Cast {
190                this: round_expr,
191                to: DataType::BigInt { length: None },
192                trailing_comments: Vec::new(),
193                double_colon_syntax: false,
194                format: None,
195                default: None,
196                inferred_type: None,
197            }));
198
199            // Build: CAST('1970-01-01' AS DATETIME2(6))
200            let epoch_start = Expression::Cast(Box::new(Cast {
201                this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
202                to: DataType::Custom {
203                    name: "DATETIME2(6)".to_string(),
204                },
205                trailing_comments: Vec::new(),
206                double_colon_syntax: false,
207                format: None,
208                default: None,
209                inferred_type: None,
210            }));
211
212            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
213            let dateadd = Expression::Function(Box::new(Function::new(
214                "DATEADD".to_string(),
215                vec![
216                    Expression::Identifier(Identifier::new("MICROSECONDS")),
217                    cast_to_bigint,
218                    epoch_start,
219                ],
220            )));
221
222            return Ok(dateadd);
223        }
224
225        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
226        // Reference: Python sqlglot Fabric dialect unixtotime_sql
227        if let Expression::Function(ref f) = expr {
228            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
229                let timestamp_input = f.args[0].clone();
230
231                // Build: column * 1e6
232                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
233                    left: timestamp_input,
234                    right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
235                    left_comments: Vec::new(),
236                    operator_comments: Vec::new(),
237                    trailing_comments: Vec::new(),
238                    inferred_type: None,
239                }));
240
241                // Build: ROUND(column * 1e6, 0)
242                let round_expr = Expression::Function(Box::new(Function::new(
243                    "ROUND".to_string(),
244                    vec![
245                        column_times_1e6,
246                        Expression::Literal(Box::new(Literal::Number("0".to_string()))),
247                    ],
248                )));
249
250                // Build: CAST(ROUND(...) AS BIGINT)
251                let cast_to_bigint = Expression::Cast(Box::new(Cast {
252                    this: round_expr,
253                    to: DataType::BigInt { length: None },
254                    trailing_comments: Vec::new(),
255                    double_colon_syntax: false,
256                    format: None,
257                    default: None,
258                    inferred_type: None,
259                }));
260
261                // Build: CAST('1970-01-01' AS DATETIME2(6))
262                let epoch_start = Expression::Cast(Box::new(Cast {
263                    this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
264                    to: DataType::Custom {
265                        name: "DATETIME2(6)".to_string(),
266                    },
267                    trailing_comments: Vec::new(),
268                    double_colon_syntax: false,
269                    format: None,
270                    default: None,
271                    inferred_type: None,
272                }));
273
274                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
275                let dateadd = Expression::Function(Box::new(Function::new(
276                    "DATEADD".to_string(),
277                    vec![
278                        Expression::Identifier(Identifier::new("MICROSECONDS")),
279                        cast_to_bigint,
280                        epoch_start,
281                    ],
282                )));
283
284                return Ok(dateadd);
285            }
286        }
287
288        // Delegate to T-SQL for other transformations
289        let tsql = TSQLDialect;
290        let transformed = tsql.transform_expr(expr)?;
291
292        // Apply Fabric-specific transformations to the result
293        self.transform_fabric_expr(transformed)
294    }
295}
296
297impl FabricDialect {
298    /// Fabric-specific expression transformations
299    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
300        match expr {
301            // Handle DataType expressions with Fabric-specific type mappings
302            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
303
304            // Pass through everything else
305            _ => Ok(expr),
306        }
307    }
308
309    /// Transform data types according to Fabric TYPE_MAPPING
310    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
311    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
312        let transformed = match dt {
313            // TIMESTAMP -> DATETIME2(6) with precision handling
314            // Note: TSQL already converts this to DATETIME2, but without precision
315            DataType::Timestamp { precision, .. } => {
316                let p = Self::cap_precision(precision, 6);
317                DataType::Custom {
318                    name: format!("DATETIME2({})", p),
319                }
320            }
321
322            // TIME -> TIME(6) default, capped at 6
323            DataType::Time { precision, .. } => {
324                let p = Self::cap_precision(precision, 6);
325                DataType::Custom {
326                    name: format!("TIME({})", p),
327                }
328            }
329
330            // INT -> INT (override TSQL which may output INTEGER)
331            DataType::Int { .. } => DataType::Custom {
332                name: "INT".to_string(),
333            },
334
335            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
336            DataType::Decimal { precision, scale } => {
337                if let (Some(p), Some(s)) = (&precision, &scale) {
338                    DataType::Custom {
339                        name: format!("DECIMAL({}, {})", p, s),
340                    }
341                } else if let Some(p) = &precision {
342                    DataType::Custom {
343                        name: format!("DECIMAL({})", p),
344                    }
345                } else {
346                    DataType::Custom {
347                        name: "DECIMAL".to_string(),
348                    }
349                }
350            }
351
352            // JSON -> VARCHAR
353            DataType::Json => DataType::Custom {
354                name: "VARCHAR".to_string(),
355            },
356
357            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
358            DataType::Uuid => DataType::Custom {
359                name: "UNIQUEIDENTIFIER".to_string(),
360            },
361
362            // TinyInt -> SMALLINT
363            DataType::TinyInt { .. } => DataType::Custom {
364                name: "SMALLINT".to_string(),
365            },
366
367            // Handle Custom types for Fabric-specific mappings
368            DataType::Custom { ref name } => {
369                let upper = name.to_uppercase();
370
371                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
372                let (base_name, precision, scale) =
373                    TSQLDialect::parse_type_precision_and_scale(&upper);
374                let has_max_length = upper.contains("(MAX)");
375
376                match base_name.as_str() {
377                    // DATETIME -> DATETIME2(6)
378                    "DATETIME" => DataType::Custom {
379                        name: "DATETIME2(6)".to_string(),
380                    },
381
382                    // SMALLDATETIME -> DATETIME2(6)
383                    "SMALLDATETIME" => DataType::Custom {
384                        name: "DATETIME2(6)".to_string(),
385                    },
386
387                    // DATETIME2 -> DATETIME2(6) default, cap at 6
388                    "DATETIME2" => {
389                        let p = Self::cap_precision(precision, 6);
390                        DataType::Custom {
391                            name: format!("DATETIME2({})", p),
392                        }
393                    }
394
395                    // DATETIMEOFFSET -> cap precision at 6
396                    "DATETIMEOFFSET" => {
397                        let p = Self::cap_precision(precision, 6);
398                        DataType::Custom {
399                            name: format!("DATETIMEOFFSET({})", p),
400                        }
401                    }
402
403                    // TIME -> TIME(6) default, cap at 6
404                    "TIME" => {
405                        let p = Self::cap_precision(precision, 6);
406                        DataType::Custom {
407                            name: format!("TIME({})", p),
408                        }
409                    }
410
411                    // TIMESTAMP -> DATETIME2(6)
412                    "TIMESTAMP" => DataType::Custom {
413                        name: "DATETIME2(6)".to_string(),
414                    },
415
416                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
417                    "TIMESTAMPNTZ" => {
418                        let p = Self::cap_precision(precision, 6);
419                        DataType::Custom {
420                            name: format!("DATETIME2({})", p),
421                        }
422                    }
423
424                    // TIMESTAMPTZ -> DATETIME2(6) with precision
425                    "TIMESTAMPTZ" => {
426                        let p = Self::cap_precision(precision, 6);
427                        DataType::Custom {
428                            name: format!("DATETIME2({})", p),
429                        }
430                    }
431
432                    // IMAGE -> VARBINARY
433                    "IMAGE" => DataType::Custom {
434                        name: "VARBINARY".to_string(),
435                    },
436
437                    // MONEY -> DECIMAL
438                    "MONEY" => DataType::Custom {
439                        name: "DECIMAL".to_string(),
440                    },
441
442                    // SMALLMONEY -> DECIMAL
443                    "SMALLMONEY" => DataType::Custom {
444                        name: "DECIMAL".to_string(),
445                    },
446
447                    // NCHAR -> CHAR (with length preserved)
448                    "NCHAR" => {
449                        if has_max_length {
450                            DataType::Custom {
451                                name: "CHAR(MAX)".to_string(),
452                            }
453                        } else if let Some(len) = precision {
454                            DataType::Custom {
455                                name: format!("CHAR({})", len),
456                            }
457                        } else {
458                            DataType::Custom {
459                                name: "CHAR".to_string(),
460                            }
461                        }
462                    }
463
464                    // NVARCHAR -> VARCHAR (with length preserved)
465                    "NVARCHAR" => {
466                        if has_max_length {
467                            DataType::Custom {
468                                name: "VARCHAR(MAX)".to_string(),
469                            }
470                        } else if let Some(len) = precision {
471                            DataType::Custom {
472                                name: format!("VARCHAR({})", len),
473                            }
474                        } else {
475                            DataType::Custom {
476                                name: "VARCHAR".to_string(),
477                            }
478                        }
479                    }
480
481                    // TINYINT -> SMALLINT
482                    "TINYINT" => DataType::Custom {
483                        name: "SMALLINT".to_string(),
484                    },
485
486                    // UTINYINT -> SMALLINT
487                    "UTINYINT" => DataType::Custom {
488                        name: "SMALLINT".to_string(),
489                    },
490
491                    // VARIANT -> SQL_VARIANT
492                    "VARIANT" => DataType::Custom {
493                        name: "SQL_VARIANT".to_string(),
494                    },
495
496                    // XML -> VARCHAR
497                    "XML" => DataType::Custom {
498                        name: "VARCHAR".to_string(),
499                    },
500
501                    // NUMERIC -> DECIMAL (override TSQL's conversion)
502                    // Fabric uses DECIMAL, not NUMERIC
503                    "NUMERIC" => {
504                        if let (Some(p), Some(s)) = (precision, scale) {
505                            DataType::Custom {
506                                name: format!("DECIMAL({}, {})", p, s),
507                            }
508                        } else if let Some(p) = precision {
509                            DataType::Custom {
510                                name: format!("DECIMAL({})", p),
511                            }
512                        } else {
513                            DataType::Custom {
514                                name: "DECIMAL".to_string(),
515                            }
516                        }
517                    }
518
519                    // Pass through other custom types unchanged
520                    _ => dt,
521                }
522            }
523
524            // Keep all other types as transformed by TSQL
525            other => other,
526        };
527
528        Ok(Expression::DataType(transformed))
529    }
530
531    /// Cap precision to max value, defaulting to max if not specified
532    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
533        match precision {
534            Some(p) if p > max => max,
535            Some(p) => p,
536            None => max, // Default to max if not specified
537        }
538    }
539}