Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::Fabric
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        // Inherit from T-SQL
33        let tsql = TSQLDialect;
34        tsql.tokenizer_config()
35    }
36
37    fn generator_config(&self) -> GeneratorConfig {
38        use crate::generator::IdentifierQuoteStyle;
39        // Inherit from T-SQL with Fabric dialect type
40        GeneratorConfig {
41            // Use square brackets like T-SQL
42            identifier_quote: '[',
43            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44            dialect: Some(DialectType::Fabric),
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52        if let Expression::CreateTable(mut ct) = expr {
53            for col in &mut ct.columns {
54                match &col.data_type {
55                    DataType::VarChar { length: None, .. } => {
56                        col.data_type = DataType::VarChar {
57                            length: Some(1),
58                            parenthesized_length: false,
59                        };
60                    }
61                    DataType::Char { length: None } => {
62                        col.data_type = DataType::Char { length: Some(1) };
63                    }
64                    _ => {}
65                }
66                // Also transform column data types through Fabric's type mappings
67                if let Expression::DataType(new_dt) =
68                    self.transform_fabric_data_type(col.data_type.clone())?
69                {
70                    col.data_type = new_dt;
71                }
72            }
73            return Ok(Expression::CreateTable(ct));
74        }
75
76        // Handle DataType::Timestamp specially BEFORE T-SQL transform
77        // because TSQL loses precision info when converting Timestamp to DATETIME2
78        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79            let p = FabricDialect::cap_precision(*precision, 6);
80            return Ok(Expression::DataType(DataType::Custom {
81                name: format!("DATETIME2({})", p),
82            }));
83        }
84
85        // Handle DataType::Time specially BEFORE T-SQL transform
86        // to ensure we get default precision of 6
87        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88            let p = FabricDialect::cap_precision(*precision, 6);
89            return Ok(Expression::DataType(DataType::Custom {
90                name: format!("TIME({})", p),
91            }));
92        }
93
94        // Handle DataType::Decimal specially BEFORE T-SQL transform
95        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97            let name = if let (Some(p), Some(s)) = (precision, scale) {
98                format!("DECIMAL({}, {})", p, s)
99            } else if let Some(p) = precision {
100                format!("DECIMAL({})", p)
101            } else {
102                "DECIMAL".to_string()
103            };
104            return Ok(Expression::DataType(DataType::Custom { name }));
105        }
106
107        // Handle AT TIME ZONE with TIMESTAMPTZ cast
108        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111        if let Expression::AtTimeZone(ref at_tz) = expr {
112            // Check if this contains a TIMESTAMPTZ cast
113            if let Expression::Cast(ref inner_cast) = at_tz.this {
114                if let DataType::Timestamp {
115                    timezone: true,
116                    precision,
117                } = &inner_cast.to
118                {
119                    // Get precision, default 6, cap at 6
120                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122                    // Create inner DATETIMEOFFSET cast
123                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124                        this: inner_cast.this.clone(),
125                        to: DataType::Custom {
126                            name: format!("DATETIMEOFFSET({})", capped_precision),
127                        },
128                        trailing_comments: inner_cast.trailing_comments.clone(),
129                        double_colon_syntax: false,
130                        format: None,
131                        default: None,
132                    }));
133
134                    // Create new AT TIME ZONE with DATETIMEOFFSET
135                    let new_at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
136                        this: datetimeoffset_cast,
137                        zone: at_tz.zone.clone(),
138                    }));
139
140                    // Wrap in outer DATETIME2 cast
141                    return Ok(Expression::Cast(Box::new(Cast {
142                        this: new_at_tz,
143                        to: DataType::Custom {
144                            name: format!("DATETIME2({})", capped_precision),
145                        },
146                        trailing_comments: Vec::new(),
147                        double_colon_syntax: false,
148                        format: None,
149                        default: None,
150                    })));
151                }
152            }
153        }
154
155        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
156        // Reference: Python sqlglot Fabric dialect unixtotime_sql
157        if let Expression::UnixToTime(ref f) = expr {
158            // Build: column * 1e6
159            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
160                left: (*f.this).clone(),
161                right: Expression::Literal(Literal::Number("1e6".to_string())),
162                left_comments: Vec::new(),
163                operator_comments: Vec::new(),
164                trailing_comments: Vec::new(),
165            }));
166
167            // Build: ROUND(column * 1e6, 0)
168            let round_expr = Expression::Function(Box::new(Function::new(
169                "ROUND".to_string(),
170                vec![
171                    column_times_1e6,
172                    Expression::Literal(Literal::Number("0".to_string())),
173                ],
174            )));
175
176            // Build: CAST(ROUND(...) AS BIGINT)
177            let cast_to_bigint = Expression::Cast(Box::new(Cast {
178                this: round_expr,
179                to: DataType::BigInt { length: None },
180                trailing_comments: Vec::new(),
181                double_colon_syntax: false,
182                format: None,
183                default: None,
184            }));
185
186            // Build: CAST('1970-01-01' AS DATETIME2(6))
187            let epoch_start = Expression::Cast(Box::new(Cast {
188                this: Expression::Literal(Literal::String("1970-01-01".to_string())),
189                to: DataType::Custom {
190                    name: "DATETIME2(6)".to_string(),
191                },
192                trailing_comments: Vec::new(),
193                double_colon_syntax: false,
194                format: None,
195                default: None,
196            }));
197
198            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
199            let dateadd = Expression::Function(Box::new(Function::new(
200                "DATEADD".to_string(),
201                vec![
202                    Expression::Identifier(Identifier::new("MICROSECONDS")),
203                    cast_to_bigint,
204                    epoch_start,
205                ],
206            )));
207
208            return Ok(dateadd);
209        }
210
211        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
212        // Reference: Python sqlglot Fabric dialect unixtotime_sql
213        if let Expression::Function(ref f) = expr {
214            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
215                let timestamp_input = f.args[0].clone();
216
217                // Build: column * 1e6
218                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
219                    left: timestamp_input,
220                    right: Expression::Literal(Literal::Number("1e6".to_string())),
221                    left_comments: Vec::new(),
222                    operator_comments: Vec::new(),
223                    trailing_comments: Vec::new(),
224                }));
225
226                // Build: ROUND(column * 1e6, 0)
227                let round_expr = Expression::Function(Box::new(Function::new(
228                    "ROUND".to_string(),
229                    vec![
230                        column_times_1e6,
231                        Expression::Literal(Literal::Number("0".to_string())),
232                    ],
233                )));
234
235                // Build: CAST(ROUND(...) AS BIGINT)
236                let cast_to_bigint = Expression::Cast(Box::new(Cast {
237                    this: round_expr,
238                    to: DataType::BigInt { length: None },
239                    trailing_comments: Vec::new(),
240                    double_colon_syntax: false,
241                    format: None,
242                    default: None,
243                }));
244
245                // Build: CAST('1970-01-01' AS DATETIME2(6))
246                let epoch_start = Expression::Cast(Box::new(Cast {
247                    this: Expression::Literal(Literal::String("1970-01-01".to_string())),
248                    to: DataType::Custom {
249                        name: "DATETIME2(6)".to_string(),
250                    },
251                    trailing_comments: Vec::new(),
252                    double_colon_syntax: false,
253                    format: None,
254                    default: None,
255                }));
256
257                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
258                let dateadd = Expression::Function(Box::new(Function::new(
259                    "DATEADD".to_string(),
260                    vec![
261                        Expression::Identifier(Identifier::new("MICROSECONDS")),
262                        cast_to_bigint,
263                        epoch_start,
264                    ],
265                )));
266
267                return Ok(dateadd);
268            }
269        }
270
271        // Delegate to T-SQL for other transformations
272        let tsql = TSQLDialect;
273        let transformed = tsql.transform_expr(expr)?;
274
275        // Apply Fabric-specific transformations to the result
276        self.transform_fabric_expr(transformed)
277    }
278}
279
280impl FabricDialect {
281    /// Fabric-specific expression transformations
282    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
283        match expr {
284            // Handle DataType expressions with Fabric-specific type mappings
285            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
286
287            // Pass through everything else
288            _ => Ok(expr),
289        }
290    }
291
292    /// Transform data types according to Fabric TYPE_MAPPING
293    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
294    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
295        let transformed = match dt {
296            // TIMESTAMP -> DATETIME2(6) with precision handling
297            // Note: TSQL already converts this to DATETIME2, but without precision
298            DataType::Timestamp { precision, .. } => {
299                let p = Self::cap_precision(precision, 6);
300                DataType::Custom {
301                    name: format!("DATETIME2({})", p),
302                }
303            }
304
305            // TIME -> TIME(6) default, capped at 6
306            DataType::Time { precision, .. } => {
307                let p = Self::cap_precision(precision, 6);
308                DataType::Custom {
309                    name: format!("TIME({})", p),
310                }
311            }
312
313            // INT -> INT (override TSQL which may output INTEGER)
314            DataType::Int { .. } => DataType::Custom {
315                name: "INT".to_string(),
316            },
317
318            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
319            DataType::Decimal { precision, scale } => {
320                if let (Some(p), Some(s)) = (&precision, &scale) {
321                    DataType::Custom {
322                        name: format!("DECIMAL({}, {})", p, s),
323                    }
324                } else if let Some(p) = &precision {
325                    DataType::Custom {
326                        name: format!("DECIMAL({})", p),
327                    }
328                } else {
329                    DataType::Custom {
330                        name: "DECIMAL".to_string(),
331                    }
332                }
333            }
334
335            // JSON -> VARCHAR
336            DataType::Json => DataType::Custom {
337                name: "VARCHAR".to_string(),
338            },
339
340            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
341            DataType::Uuid => DataType::Custom {
342                name: "UNIQUEIDENTIFIER".to_string(),
343            },
344
345            // TinyInt -> SMALLINT
346            DataType::TinyInt { .. } => DataType::Custom {
347                name: "SMALLINT".to_string(),
348            },
349
350            // Handle Custom types for Fabric-specific mappings
351            DataType::Custom { ref name } => {
352                let upper = name.to_uppercase();
353
354                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
355                let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
356
357                match base_name.as_str() {
358                    // DATETIME -> DATETIME2(6)
359                    "DATETIME" => DataType::Custom {
360                        name: "DATETIME2(6)".to_string(),
361                    },
362
363                    // SMALLDATETIME -> DATETIME2(6)
364                    "SMALLDATETIME" => DataType::Custom {
365                        name: "DATETIME2(6)".to_string(),
366                    },
367
368                    // DATETIME2 -> DATETIME2(6) default, cap at 6
369                    "DATETIME2" => {
370                        let p = Self::cap_precision(precision, 6);
371                        DataType::Custom {
372                            name: format!("DATETIME2({})", p),
373                        }
374                    }
375
376                    // DATETIMEOFFSET -> cap precision at 6
377                    "DATETIMEOFFSET" => {
378                        let p = Self::cap_precision(precision, 6);
379                        DataType::Custom {
380                            name: format!("DATETIMEOFFSET({})", p),
381                        }
382                    }
383
384                    // TIME -> TIME(6) default, cap at 6
385                    "TIME" => {
386                        let p = Self::cap_precision(precision, 6);
387                        DataType::Custom {
388                            name: format!("TIME({})", p),
389                        }
390                    }
391
392                    // TIMESTAMP -> DATETIME2(6)
393                    "TIMESTAMP" => DataType::Custom {
394                        name: "DATETIME2(6)".to_string(),
395                    },
396
397                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
398                    "TIMESTAMPNTZ" => {
399                        let p = Self::cap_precision(precision, 6);
400                        DataType::Custom {
401                            name: format!("DATETIME2({})", p),
402                        }
403                    }
404
405                    // TIMESTAMPTZ -> DATETIME2(6) with precision
406                    "TIMESTAMPTZ" => {
407                        let p = Self::cap_precision(precision, 6);
408                        DataType::Custom {
409                            name: format!("DATETIME2({})", p),
410                        }
411                    }
412
413                    // IMAGE -> VARBINARY
414                    "IMAGE" => DataType::Custom {
415                        name: "VARBINARY".to_string(),
416                    },
417
418                    // MONEY -> DECIMAL
419                    "MONEY" => DataType::Custom {
420                        name: "DECIMAL".to_string(),
421                    },
422
423                    // SMALLMONEY -> DECIMAL
424                    "SMALLMONEY" => DataType::Custom {
425                        name: "DECIMAL".to_string(),
426                    },
427
428                    // NCHAR -> CHAR (with length preserved)
429                    "NCHAR" => {
430                        if let Some(len) = precision {
431                            DataType::Custom {
432                                name: format!("CHAR({})", len),
433                            }
434                        } else {
435                            DataType::Custom {
436                                name: "CHAR".to_string(),
437                            }
438                        }
439                    }
440
441                    // NVARCHAR -> VARCHAR (with length preserved)
442                    "NVARCHAR" => {
443                        if let Some(len) = precision {
444                            DataType::Custom {
445                                name: format!("VARCHAR({})", len),
446                            }
447                        } else {
448                            DataType::Custom {
449                                name: "VARCHAR".to_string(),
450                            }
451                        }
452                    }
453
454                    // TINYINT -> SMALLINT
455                    "TINYINT" => DataType::Custom {
456                        name: "SMALLINT".to_string(),
457                    },
458
459                    // UTINYINT -> SMALLINT
460                    "UTINYINT" => DataType::Custom {
461                        name: "SMALLINT".to_string(),
462                    },
463
464                    // VARIANT -> SQL_VARIANT
465                    "VARIANT" => DataType::Custom {
466                        name: "SQL_VARIANT".to_string(),
467                    },
468
469                    // XML -> VARCHAR
470                    "XML" => DataType::Custom {
471                        name: "VARCHAR".to_string(),
472                    },
473
474                    // NUMERIC -> DECIMAL (override TSQL's conversion)
475                    // Fabric uses DECIMAL, not NUMERIC
476                    "NUMERIC" => {
477                        if let (Some(p), Some(s)) = (precision, scale) {
478                            DataType::Custom {
479                                name: format!("DECIMAL({}, {})", p, s),
480                            }
481                        } else if let Some(p) = precision {
482                            DataType::Custom {
483                                name: format!("DECIMAL({})", p),
484                            }
485                        } else {
486                            DataType::Custom {
487                                name: "DECIMAL".to_string(),
488                            }
489                        }
490                    }
491
492                    // Pass through other custom types unchanged
493                    _ => dt,
494                }
495            }
496
497            // Keep all other types as transformed by TSQL
498            other => other,
499        };
500
501        Ok(Expression::DataType(transformed))
502    }
503
504    /// Cap precision to max value, defaulting to max if not specified
505    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
506        match precision {
507            Some(p) if p > max => max,
508            Some(p) => p,
509            None => max, // Default to max if not specified
510        }
511    }
512
513    /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
514    fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
515        if let Some(paren_pos) = name.find('(') {
516            let base = name[..paren_pos].to_string();
517            let rest = &name[paren_pos + 1..];
518            if let Some(close_pos) = rest.find(')') {
519                let args = &rest[..close_pos];
520                let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
521
522                let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
523                let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
524
525                return (base, precision, scale);
526            }
527            (base, None, None)
528        } else {
529            (name.to_string(), None, None)
530        }
531    }
532}