Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::Fabric
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        // Inherit from T-SQL
33        let tsql = TSQLDialect;
34        tsql.tokenizer_config()
35    }
36
37    fn generator_config(&self) -> GeneratorConfig {
38        use crate::generator::IdentifierQuoteStyle;
39        // Inherit from T-SQL with Fabric dialect type
40        GeneratorConfig {
41            // Use square brackets like T-SQL
42            identifier_quote: '[',
43            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44            dialect: Some(DialectType::Fabric),
45            null_ordering_supported: false,
46            ..Default::default()
47        }
48    }
49
50    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
51        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
52        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
53        if let Expression::CreateTable(mut ct) = expr {
54            for col in &mut ct.columns {
55                match &col.data_type {
56                    DataType::VarChar { length: None, .. } => {
57                        col.data_type = DataType::VarChar {
58                            length: Some(1),
59                            parenthesized_length: false,
60                        };
61                    }
62                    DataType::Char { length: None } => {
63                        col.data_type = DataType::Char { length: Some(1) };
64                    }
65                    _ => {}
66                }
67                // Also transform column data types through Fabric's type mappings.
68                // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
69                let tsql = TSQLDialect;
70                if let Ok(Expression::DataType(tsql_dt)) =
71                    tsql.transform_data_type(col.data_type.clone())
72                {
73                    col.data_type = tsql_dt;
74                }
75                if let Expression::DataType(new_dt) =
76                    self.transform_fabric_data_type(col.data_type.clone())?
77                {
78                    col.data_type = new_dt;
79                }
80            }
81            return Ok(Expression::CreateTable(ct));
82        }
83
84        // Handle DataType::Timestamp specially BEFORE T-SQL transform
85        // because TSQL loses precision info when converting Timestamp to DATETIME2
86        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
87            let p = FabricDialect::cap_precision(*precision, 6);
88            return Ok(Expression::DataType(DataType::Custom {
89                name: format!("DATETIME2({})", p),
90            }));
91        }
92
93        // Handle DataType::Time specially BEFORE T-SQL transform
94        // to ensure we get default precision of 6
95        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
96            let p = FabricDialect::cap_precision(*precision, 6);
97            return Ok(Expression::DataType(DataType::Custom {
98                name: format!("TIME({})", p),
99            }));
100        }
101
102        // Handle DataType::Decimal specially BEFORE T-SQL transform
103        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
104        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
105            let name = if let (Some(p), Some(s)) = (precision, scale) {
106                format!("DECIMAL({}, {})", p, s)
107            } else if let Some(p) = precision {
108                format!("DECIMAL({})", p)
109            } else {
110                "DECIMAL".to_string()
111            };
112            return Ok(Expression::DataType(DataType::Custom { name }));
113        }
114
115        // Handle AT TIME ZONE with TIMESTAMPTZ cast
116        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
117        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
118        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
119        if let Expression::AtTimeZone(ref at_tz) = expr {
120            // Check if this contains a TIMESTAMPTZ cast
121            if let Expression::Cast(ref inner_cast) = at_tz.this {
122                if let DataType::Timestamp {
123                    timezone: true,
124                    precision,
125                } = &inner_cast.to
126                {
127                    // Get precision, default 6, cap at 6
128                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
129
130                    // Create inner DATETIMEOFFSET cast
131                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
132                        this: inner_cast.this.clone(),
133                        to: DataType::Custom {
134                            name: format!("DATETIMEOFFSET({})", capped_precision),
135                        },
136                        trailing_comments: inner_cast.trailing_comments.clone(),
137                        double_colon_syntax: false,
138                        format: None,
139                        default: None,
140                        inferred_type: None,
141                    }));
142
143                    // Create new AT TIME ZONE with DATETIMEOFFSET
144                    let new_at_tz =
145                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
146                            this: datetimeoffset_cast,
147                            zone: at_tz.zone.clone(),
148                        }));
149
150                    // Wrap in outer DATETIME2 cast
151                    return Ok(Expression::Cast(Box::new(Cast {
152                        this: new_at_tz,
153                        to: DataType::Custom {
154                            name: format!("DATETIME2({})", capped_precision),
155                        },
156                        trailing_comments: Vec::new(),
157                        double_colon_syntax: false,
158                        format: None,
159                        default: None,
160                        inferred_type: None,
161                    })));
162                }
163            }
164        }
165
166        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
167        // Reference: Python sqlglot Fabric dialect unixtotime_sql
168        if let Expression::UnixToTime(ref f) = expr {
169            // Build: column * 1e6
170            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
171                left: (*f.this).clone(),
172                right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
173                left_comments: Vec::new(),
174                operator_comments: Vec::new(),
175                trailing_comments: Vec::new(),
176                inferred_type: None,
177            }));
178
179            // Build: ROUND(column * 1e6, 0)
180            let round_expr = Expression::Function(Box::new(Function::new(
181                "ROUND".to_string(),
182                vec![
183                    column_times_1e6,
184                    Expression::Literal(Box::new(Literal::Number("0".to_string()))),
185                ],
186            )));
187
188            // Build: CAST(ROUND(...) AS BIGINT)
189            let cast_to_bigint = Expression::Cast(Box::new(Cast {
190                this: round_expr,
191                to: DataType::BigInt { length: None },
192                trailing_comments: Vec::new(),
193                double_colon_syntax: false,
194                format: None,
195                default: None,
196                inferred_type: None,
197            }));
198
199            // Build: CAST('1970-01-01' AS DATETIME2(6))
200            let epoch_start = Expression::Cast(Box::new(Cast {
201                this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
202                to: DataType::Custom {
203                    name: "DATETIME2(6)".to_string(),
204                },
205                trailing_comments: Vec::new(),
206                double_colon_syntax: false,
207                format: None,
208                default: None,
209                inferred_type: None,
210            }));
211
212            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
213            let dateadd = Expression::Function(Box::new(Function::new(
214                "DATEADD".to_string(),
215                vec![
216                    Expression::Identifier(Identifier::new("MICROSECONDS")),
217                    cast_to_bigint,
218                    epoch_start,
219                ],
220            )));
221
222            return Ok(dateadd);
223        }
224
225        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
226        // Reference: Python sqlglot Fabric dialect unixtotime_sql
227        if let Expression::Function(ref f) = expr {
228            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
229                let timestamp_input = f.args[0].clone();
230
231                // Build: column * 1e6
232                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
233                    left: timestamp_input,
234                    right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
235                    left_comments: Vec::new(),
236                    operator_comments: Vec::new(),
237                    trailing_comments: Vec::new(),
238                    inferred_type: None,
239                }));
240
241                // Build: ROUND(column * 1e6, 0)
242                let round_expr = Expression::Function(Box::new(Function::new(
243                    "ROUND".to_string(),
244                    vec![
245                        column_times_1e6,
246                        Expression::Literal(Box::new(Literal::Number("0".to_string()))),
247                    ],
248                )));
249
250                // Build: CAST(ROUND(...) AS BIGINT)
251                let cast_to_bigint = Expression::Cast(Box::new(Cast {
252                    this: round_expr,
253                    to: DataType::BigInt { length: None },
254                    trailing_comments: Vec::new(),
255                    double_colon_syntax: false,
256                    format: None,
257                    default: None,
258                    inferred_type: None,
259                }));
260
261                // Build: CAST('1970-01-01' AS DATETIME2(6))
262                let epoch_start = Expression::Cast(Box::new(Cast {
263                    this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
264                    to: DataType::Custom {
265                        name: "DATETIME2(6)".to_string(),
266                    },
267                    trailing_comments: Vec::new(),
268                    double_colon_syntax: false,
269                    format: None,
270                    default: None,
271                    inferred_type: None,
272                }));
273
274                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
275                let dateadd = Expression::Function(Box::new(Function::new(
276                    "DATEADD".to_string(),
277                    vec![
278                        Expression::Identifier(Identifier::new("MICROSECONDS")),
279                        cast_to_bigint,
280                        epoch_start,
281                    ],
282                )));
283
284                return Ok(dateadd);
285            }
286        }
287
288        // Delegate to T-SQL for other transformations
289        let tsql = TSQLDialect;
290        let transformed = tsql.transform_expr(expr)?;
291
292        // Apply Fabric-specific transformations to the result
293        self.transform_fabric_expr(transformed)
294    }
295}
296
297impl FabricDialect {
298    /// Fabric-specific expression transformations
299    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
300        match expr {
301            // Handle DataType expressions with Fabric-specific type mappings
302            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
303
304            // Pass through everything else
305            _ => Ok(expr),
306        }
307    }
308
309    /// Transform data types according to Fabric TYPE_MAPPING
310    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
311    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
312        let transformed = match dt {
313            // TIMESTAMP -> DATETIME2(6) with precision handling
314            // Note: TSQL already converts this to DATETIME2, but without precision
315            DataType::Timestamp { precision, .. } => {
316                let p = Self::cap_precision(precision, 6);
317                DataType::Custom {
318                    name: format!("DATETIME2({})", p),
319                }
320            }
321
322            // TIME -> TIME(6) default, capped at 6
323            DataType::Time { precision, .. } => {
324                let p = Self::cap_precision(precision, 6);
325                DataType::Custom {
326                    name: format!("TIME({})", p),
327                }
328            }
329
330            // INT -> INT (override TSQL which may output INTEGER)
331            DataType::Int { .. } => DataType::Custom {
332                name: "INT".to_string(),
333            },
334
335            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
336            DataType::Decimal { precision, scale } => {
337                if let (Some(p), Some(s)) = (&precision, &scale) {
338                    DataType::Custom {
339                        name: format!("DECIMAL({}, {})", p, s),
340                    }
341                } else if let Some(p) = &precision {
342                    DataType::Custom {
343                        name: format!("DECIMAL({})", p),
344                    }
345                } else {
346                    DataType::Custom {
347                        name: "DECIMAL".to_string(),
348                    }
349                }
350            }
351
352            // JSON -> VARCHAR
353            DataType::Json => DataType::Custom {
354                name: "VARCHAR".to_string(),
355            },
356
357            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
358            DataType::Uuid => DataType::Custom {
359                name: "UNIQUEIDENTIFIER".to_string(),
360            },
361
362            // TinyInt -> SMALLINT
363            DataType::TinyInt { .. } => DataType::Custom {
364                name: "SMALLINT".to_string(),
365            },
366
367            // Handle Custom types for Fabric-specific mappings
368            DataType::Custom { ref name } => {
369                let upper = name.to_uppercase();
370
371                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
372                let (base_name, precision, scale) =
373                    TSQLDialect::parse_type_precision_and_scale(&upper);
374
375                match base_name.as_str() {
376                    // DATETIME -> DATETIME2(6)
377                    "DATETIME" => DataType::Custom {
378                        name: "DATETIME2(6)".to_string(),
379                    },
380
381                    // SMALLDATETIME -> DATETIME2(6)
382                    "SMALLDATETIME" => DataType::Custom {
383                        name: "DATETIME2(6)".to_string(),
384                    },
385
386                    // DATETIME2 -> DATETIME2(6) default, cap at 6
387                    "DATETIME2" => {
388                        let p = Self::cap_precision(precision, 6);
389                        DataType::Custom {
390                            name: format!("DATETIME2({})", p),
391                        }
392                    }
393
394                    // DATETIMEOFFSET -> cap precision at 6
395                    "DATETIMEOFFSET" => {
396                        let p = Self::cap_precision(precision, 6);
397                        DataType::Custom {
398                            name: format!("DATETIMEOFFSET({})", p),
399                        }
400                    }
401
402                    // TIME -> TIME(6) default, cap at 6
403                    "TIME" => {
404                        let p = Self::cap_precision(precision, 6);
405                        DataType::Custom {
406                            name: format!("TIME({})", p),
407                        }
408                    }
409
410                    // TIMESTAMP -> DATETIME2(6)
411                    "TIMESTAMP" => DataType::Custom {
412                        name: "DATETIME2(6)".to_string(),
413                    },
414
415                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
416                    "TIMESTAMPNTZ" => {
417                        let p = Self::cap_precision(precision, 6);
418                        DataType::Custom {
419                            name: format!("DATETIME2({})", p),
420                        }
421                    }
422
423                    // TIMESTAMPTZ -> DATETIME2(6) with precision
424                    "TIMESTAMPTZ" => {
425                        let p = Self::cap_precision(precision, 6);
426                        DataType::Custom {
427                            name: format!("DATETIME2({})", p),
428                        }
429                    }
430
431                    // IMAGE -> VARBINARY
432                    "IMAGE" => DataType::Custom {
433                        name: "VARBINARY".to_string(),
434                    },
435
436                    // MONEY -> DECIMAL
437                    "MONEY" => DataType::Custom {
438                        name: "DECIMAL".to_string(),
439                    },
440
441                    // SMALLMONEY -> DECIMAL
442                    "SMALLMONEY" => DataType::Custom {
443                        name: "DECIMAL".to_string(),
444                    },
445
446                    // NCHAR -> CHAR (with length preserved)
447                    "NCHAR" => {
448                        if let Some(len) = precision {
449                            DataType::Custom {
450                                name: format!("CHAR({})", len),
451                            }
452                        } else {
453                            DataType::Custom {
454                                name: "CHAR".to_string(),
455                            }
456                        }
457                    }
458
459                    // NVARCHAR -> VARCHAR (with length preserved)
460                    "NVARCHAR" => {
461                        if let Some(len) = precision {
462                            DataType::Custom {
463                                name: format!("VARCHAR({})", len),
464                            }
465                        } else {
466                            DataType::Custom {
467                                name: "VARCHAR".to_string(),
468                            }
469                        }
470                    }
471
472                    // TINYINT -> SMALLINT
473                    "TINYINT" => DataType::Custom {
474                        name: "SMALLINT".to_string(),
475                    },
476
477                    // UTINYINT -> SMALLINT
478                    "UTINYINT" => DataType::Custom {
479                        name: "SMALLINT".to_string(),
480                    },
481
482                    // VARIANT -> SQL_VARIANT
483                    "VARIANT" => DataType::Custom {
484                        name: "SQL_VARIANT".to_string(),
485                    },
486
487                    // XML -> VARCHAR
488                    "XML" => DataType::Custom {
489                        name: "VARCHAR".to_string(),
490                    },
491
492                    // NUMERIC -> DECIMAL (override TSQL's conversion)
493                    // Fabric uses DECIMAL, not NUMERIC
494                    "NUMERIC" => {
495                        if let (Some(p), Some(s)) = (precision, scale) {
496                            DataType::Custom {
497                                name: format!("DECIMAL({}, {})", p, s),
498                            }
499                        } else if let Some(p) = precision {
500                            DataType::Custom {
501                                name: format!("DECIMAL({})", p),
502                            }
503                        } else {
504                            DataType::Custom {
505                                name: "DECIMAL".to_string(),
506                            }
507                        }
508                    }
509
510                    // Pass through other custom types unchanged
511                    _ => dt,
512                }
513            }
514
515            // Keep all other types as transformed by TSQL
516            other => other,
517        };
518
519        Ok(Expression::DataType(transformed))
520    }
521
522    /// Cap precision to max value, defaulting to max if not specified
523    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
524        match precision {
525            Some(p) if p > max => max,
526            Some(p) => p,
527            None => max, // Default to max if not specified
528        }
529    }
530}