Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::Fabric
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        // Inherit from T-SQL
33        let tsql = TSQLDialect;
34        tsql.tokenizer_config()
35    }
36
37    fn generator_config(&self) -> GeneratorConfig {
38        use crate::generator::IdentifierQuoteStyle;
39        // Inherit from T-SQL with Fabric dialect type
40        GeneratorConfig {
41            // Use square brackets like T-SQL
42            identifier_quote: '[',
43            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44            dialect: Some(DialectType::Fabric),
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52        if let Expression::CreateTable(mut ct) = expr {
53            for col in &mut ct.columns {
54                match &col.data_type {
55                    DataType::VarChar { length: None, .. } => {
56                        col.data_type = DataType::VarChar {
57                            length: Some(1),
58                            parenthesized_length: false,
59                        };
60                    }
61                    DataType::Char { length: None } => {
62                        col.data_type = DataType::Char { length: Some(1) };
63                    }
64                    _ => {}
65                }
66                // Also transform column data types through Fabric's type mappings
67                if let Expression::DataType(new_dt) =
68                    self.transform_fabric_data_type(col.data_type.clone())?
69                {
70                    col.data_type = new_dt;
71                }
72            }
73            return Ok(Expression::CreateTable(ct));
74        }
75
76        // Handle DataType::Timestamp specially BEFORE T-SQL transform
77        // because TSQL loses precision info when converting Timestamp to DATETIME2
78        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79            let p = FabricDialect::cap_precision(*precision, 6);
80            return Ok(Expression::DataType(DataType::Custom {
81                name: format!("DATETIME2({})", p),
82            }));
83        }
84
85        // Handle DataType::Time specially BEFORE T-SQL transform
86        // to ensure we get default precision of 6
87        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88            let p = FabricDialect::cap_precision(*precision, 6);
89            return Ok(Expression::DataType(DataType::Custom {
90                name: format!("TIME({})", p),
91            }));
92        }
93
94        // Handle DataType::Decimal specially BEFORE T-SQL transform
95        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97            let name = if let (Some(p), Some(s)) = (precision, scale) {
98                format!("DECIMAL({}, {})", p, s)
99            } else if let Some(p) = precision {
100                format!("DECIMAL({})", p)
101            } else {
102                "DECIMAL".to_string()
103            };
104            return Ok(Expression::DataType(DataType::Custom { name }));
105        }
106
107        // Handle AT TIME ZONE with TIMESTAMPTZ cast
108        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111        if let Expression::AtTimeZone(ref at_tz) = expr {
112            // Check if this contains a TIMESTAMPTZ cast
113            if let Expression::Cast(ref inner_cast) = at_tz.this {
114                if let DataType::Timestamp {
115                    timezone: true,
116                    precision,
117                } = &inner_cast.to
118                {
119                    // Get precision, default 6, cap at 6
120                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122                    // Create inner DATETIMEOFFSET cast
123                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124                        this: inner_cast.this.clone(),
125                        to: DataType::Custom {
126                            name: format!("DATETIMEOFFSET({})", capped_precision),
127                        },
128                        trailing_comments: inner_cast.trailing_comments.clone(),
129                        double_colon_syntax: false,
130                        format: None,
131                        default: None,
132                        inferred_type: None,
133                    }));
134
135                    // Create new AT TIME ZONE with DATETIMEOFFSET
136                    let new_at_tz =
137                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
138                            this: datetimeoffset_cast,
139                            zone: at_tz.zone.clone(),
140                        }));
141
142                    // Wrap in outer DATETIME2 cast
143                    return Ok(Expression::Cast(Box::new(Cast {
144                        this: new_at_tz,
145                        to: DataType::Custom {
146                            name: format!("DATETIME2({})", capped_precision),
147                        },
148                        trailing_comments: Vec::new(),
149                        double_colon_syntax: false,
150                        format: None,
151                        default: None,
152                        inferred_type: None,
153                    })));
154                }
155            }
156        }
157
158        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
159        // Reference: Python sqlglot Fabric dialect unixtotime_sql
160        if let Expression::UnixToTime(ref f) = expr {
161            // Build: column * 1e6
162            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
163                left: (*f.this).clone(),
164                right: Expression::Literal(Literal::Number("1e6".to_string())),
165                left_comments: Vec::new(),
166                operator_comments: Vec::new(),
167                trailing_comments: Vec::new(),
168                inferred_type: None,
169            }));
170
171            // Build: ROUND(column * 1e6, 0)
172            let round_expr = Expression::Function(Box::new(Function::new(
173                "ROUND".to_string(),
174                vec![
175                    column_times_1e6,
176                    Expression::Literal(Literal::Number("0".to_string())),
177                ],
178            )));
179
180            // Build: CAST(ROUND(...) AS BIGINT)
181            let cast_to_bigint = Expression::Cast(Box::new(Cast {
182                this: round_expr,
183                to: DataType::BigInt { length: None },
184                trailing_comments: Vec::new(),
185                double_colon_syntax: false,
186                format: None,
187                default: None,
188                inferred_type: None,
189            }));
190
191            // Build: CAST('1970-01-01' AS DATETIME2(6))
192            let epoch_start = Expression::Cast(Box::new(Cast {
193                this: Expression::Literal(Literal::String("1970-01-01".to_string())),
194                to: DataType::Custom {
195                    name: "DATETIME2(6)".to_string(),
196                },
197                trailing_comments: Vec::new(),
198                double_colon_syntax: false,
199                format: None,
200                default: None,
201                inferred_type: None,
202            }));
203
204            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
205            let dateadd = Expression::Function(Box::new(Function::new(
206                "DATEADD".to_string(),
207                vec![
208                    Expression::Identifier(Identifier::new("MICROSECONDS")),
209                    cast_to_bigint,
210                    epoch_start,
211                ],
212            )));
213
214            return Ok(dateadd);
215        }
216
217        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
218        // Reference: Python sqlglot Fabric dialect unixtotime_sql
219        if let Expression::Function(ref f) = expr {
220            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
221                let timestamp_input = f.args[0].clone();
222
223                // Build: column * 1e6
224                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
225                    left: timestamp_input,
226                    right: Expression::Literal(Literal::Number("1e6".to_string())),
227                    left_comments: Vec::new(),
228                    operator_comments: Vec::new(),
229                    trailing_comments: Vec::new(),
230                    inferred_type: None,
231                }));
232
233                // Build: ROUND(column * 1e6, 0)
234                let round_expr = Expression::Function(Box::new(Function::new(
235                    "ROUND".to_string(),
236                    vec![
237                        column_times_1e6,
238                        Expression::Literal(Literal::Number("0".to_string())),
239                    ],
240                )));
241
242                // Build: CAST(ROUND(...) AS BIGINT)
243                let cast_to_bigint = Expression::Cast(Box::new(Cast {
244                    this: round_expr,
245                    to: DataType::BigInt { length: None },
246                    trailing_comments: Vec::new(),
247                    double_colon_syntax: false,
248                    format: None,
249                    default: None,
250                    inferred_type: None,
251                }));
252
253                // Build: CAST('1970-01-01' AS DATETIME2(6))
254                let epoch_start = Expression::Cast(Box::new(Cast {
255                    this: Expression::Literal(Literal::String("1970-01-01".to_string())),
256                    to: DataType::Custom {
257                        name: "DATETIME2(6)".to_string(),
258                    },
259                    trailing_comments: Vec::new(),
260                    double_colon_syntax: false,
261                    format: None,
262                    default: None,
263                    inferred_type: None,
264                }));
265
266                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
267                let dateadd = Expression::Function(Box::new(Function::new(
268                    "DATEADD".to_string(),
269                    vec![
270                        Expression::Identifier(Identifier::new("MICROSECONDS")),
271                        cast_to_bigint,
272                        epoch_start,
273                    ],
274                )));
275
276                return Ok(dateadd);
277            }
278        }
279
280        // Delegate to T-SQL for other transformations
281        let tsql = TSQLDialect;
282        let transformed = tsql.transform_expr(expr)?;
283
284        // Apply Fabric-specific transformations to the result
285        self.transform_fabric_expr(transformed)
286    }
287}
288
289impl FabricDialect {
290    /// Fabric-specific expression transformations
291    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
292        match expr {
293            // Handle DataType expressions with Fabric-specific type mappings
294            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
295
296            // Pass through everything else
297            _ => Ok(expr),
298        }
299    }
300
301    /// Transform data types according to Fabric TYPE_MAPPING
302    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
303    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
304        let transformed = match dt {
305            // TIMESTAMP -> DATETIME2(6) with precision handling
306            // Note: TSQL already converts this to DATETIME2, but without precision
307            DataType::Timestamp { precision, .. } => {
308                let p = Self::cap_precision(precision, 6);
309                DataType::Custom {
310                    name: format!("DATETIME2({})", p),
311                }
312            }
313
314            // TIME -> TIME(6) default, capped at 6
315            DataType::Time { precision, .. } => {
316                let p = Self::cap_precision(precision, 6);
317                DataType::Custom {
318                    name: format!("TIME({})", p),
319                }
320            }
321
322            // INT -> INT (override TSQL which may output INTEGER)
323            DataType::Int { .. } => DataType::Custom {
324                name: "INT".to_string(),
325            },
326
327            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
328            DataType::Decimal { precision, scale } => {
329                if let (Some(p), Some(s)) = (&precision, &scale) {
330                    DataType::Custom {
331                        name: format!("DECIMAL({}, {})", p, s),
332                    }
333                } else if let Some(p) = &precision {
334                    DataType::Custom {
335                        name: format!("DECIMAL({})", p),
336                    }
337                } else {
338                    DataType::Custom {
339                        name: "DECIMAL".to_string(),
340                    }
341                }
342            }
343
344            // JSON -> VARCHAR
345            DataType::Json => DataType::Custom {
346                name: "VARCHAR".to_string(),
347            },
348
349            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
350            DataType::Uuid => DataType::Custom {
351                name: "UNIQUEIDENTIFIER".to_string(),
352            },
353
354            // TinyInt -> SMALLINT
355            DataType::TinyInt { .. } => DataType::Custom {
356                name: "SMALLINT".to_string(),
357            },
358
359            // Handle Custom types for Fabric-specific mappings
360            DataType::Custom { ref name } => {
361                let upper = name.to_uppercase();
362
363                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
364                let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
365
366                match base_name.as_str() {
367                    // DATETIME -> DATETIME2(6)
368                    "DATETIME" => DataType::Custom {
369                        name: "DATETIME2(6)".to_string(),
370                    },
371
372                    // SMALLDATETIME -> DATETIME2(6)
373                    "SMALLDATETIME" => DataType::Custom {
374                        name: "DATETIME2(6)".to_string(),
375                    },
376
377                    // DATETIME2 -> DATETIME2(6) default, cap at 6
378                    "DATETIME2" => {
379                        let p = Self::cap_precision(precision, 6);
380                        DataType::Custom {
381                            name: format!("DATETIME2({})", p),
382                        }
383                    }
384
385                    // DATETIMEOFFSET -> cap precision at 6
386                    "DATETIMEOFFSET" => {
387                        let p = Self::cap_precision(precision, 6);
388                        DataType::Custom {
389                            name: format!("DATETIMEOFFSET({})", p),
390                        }
391                    }
392
393                    // TIME -> TIME(6) default, cap at 6
394                    "TIME" => {
395                        let p = Self::cap_precision(precision, 6);
396                        DataType::Custom {
397                            name: format!("TIME({})", p),
398                        }
399                    }
400
401                    // TIMESTAMP -> DATETIME2(6)
402                    "TIMESTAMP" => DataType::Custom {
403                        name: "DATETIME2(6)".to_string(),
404                    },
405
406                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
407                    "TIMESTAMPNTZ" => {
408                        let p = Self::cap_precision(precision, 6);
409                        DataType::Custom {
410                            name: format!("DATETIME2({})", p),
411                        }
412                    }
413
414                    // TIMESTAMPTZ -> DATETIME2(6) with precision
415                    "TIMESTAMPTZ" => {
416                        let p = Self::cap_precision(precision, 6);
417                        DataType::Custom {
418                            name: format!("DATETIME2({})", p),
419                        }
420                    }
421
422                    // IMAGE -> VARBINARY
423                    "IMAGE" => DataType::Custom {
424                        name: "VARBINARY".to_string(),
425                    },
426
427                    // MONEY -> DECIMAL
428                    "MONEY" => DataType::Custom {
429                        name: "DECIMAL".to_string(),
430                    },
431
432                    // SMALLMONEY -> DECIMAL
433                    "SMALLMONEY" => DataType::Custom {
434                        name: "DECIMAL".to_string(),
435                    },
436
437                    // NCHAR -> CHAR (with length preserved)
438                    "NCHAR" => {
439                        if let Some(len) = precision {
440                            DataType::Custom {
441                                name: format!("CHAR({})", len),
442                            }
443                        } else {
444                            DataType::Custom {
445                                name: "CHAR".to_string(),
446                            }
447                        }
448                    }
449
450                    // NVARCHAR -> VARCHAR (with length preserved)
451                    "NVARCHAR" => {
452                        if let Some(len) = precision {
453                            DataType::Custom {
454                                name: format!("VARCHAR({})", len),
455                            }
456                        } else {
457                            DataType::Custom {
458                                name: "VARCHAR".to_string(),
459                            }
460                        }
461                    }
462
463                    // TINYINT -> SMALLINT
464                    "TINYINT" => DataType::Custom {
465                        name: "SMALLINT".to_string(),
466                    },
467
468                    // UTINYINT -> SMALLINT
469                    "UTINYINT" => DataType::Custom {
470                        name: "SMALLINT".to_string(),
471                    },
472
473                    // VARIANT -> SQL_VARIANT
474                    "VARIANT" => DataType::Custom {
475                        name: "SQL_VARIANT".to_string(),
476                    },
477
478                    // XML -> VARCHAR
479                    "XML" => DataType::Custom {
480                        name: "VARCHAR".to_string(),
481                    },
482
483                    // NUMERIC -> DECIMAL (override TSQL's conversion)
484                    // Fabric uses DECIMAL, not NUMERIC
485                    "NUMERIC" => {
486                        if let (Some(p), Some(s)) = (precision, scale) {
487                            DataType::Custom {
488                                name: format!("DECIMAL({}, {})", p, s),
489                            }
490                        } else if let Some(p) = precision {
491                            DataType::Custom {
492                                name: format!("DECIMAL({})", p),
493                            }
494                        } else {
495                            DataType::Custom {
496                                name: "DECIMAL".to_string(),
497                            }
498                        }
499                    }
500
501                    // Pass through other custom types unchanged
502                    _ => dt,
503                }
504            }
505
506            // Keep all other types as transformed by TSQL
507            other => other,
508        };
509
510        Ok(Expression::DataType(transformed))
511    }
512
513    /// Cap precision to max value, defaulting to max if not specified
514    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
515        match precision {
516            Some(p) if p > max => max,
517            Some(p) => p,
518            None => max, // Default to max if not specified
519        }
520    }
521
522    /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
523    fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
524        if let Some(paren_pos) = name.find('(') {
525            let base = name[..paren_pos].to_string();
526            let rest = &name[paren_pos + 1..];
527            if let Some(close_pos) = rest.find(')') {
528                let args = &rest[..close_pos];
529                let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
530
531                let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
532                let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
533
534                return (base, precision, scale);
535            }
536            (base, None, None)
537        } else {
538            (name.to_string(), None, None)
539        }
540    }
541}