Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20#[cfg(feature = "generate")]
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
25pub struct FabricDialect;
26
27impl DialectImpl for FabricDialect {
28    fn dialect_type(&self) -> DialectType {
29        DialectType::Fabric
30    }
31
32    fn tokenizer_config(&self) -> TokenizerConfig {
33        // Inherit from T-SQL
34        let tsql = TSQLDialect;
35        tsql.tokenizer_config()
36    }
37
38    #[cfg(feature = "generate")]
39
40    fn generator_config(&self) -> GeneratorConfig {
41        use crate::generator::IdentifierQuoteStyle;
42        // Inherit from T-SQL with Fabric dialect type
43        GeneratorConfig {
44            // Use square brackets like T-SQL
45            identifier_quote: '[',
46            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
47            dialect: Some(DialectType::Fabric),
48            null_ordering_supported: false,
49            ..Default::default()
50        }
51    }
52
53    #[cfg(feature = "transpile")]
54
55    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
56        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
57        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
58        if let Expression::CreateTable(mut ct) = expr {
59            for col in &mut ct.columns {
60                match &col.data_type {
61                    DataType::VarChar { length: None, .. } => {
62                        col.data_type = DataType::VarChar {
63                            length: Some(1),
64                            parenthesized_length: false,
65                        };
66                    }
67                    DataType::Char { length: None } => {
68                        col.data_type = DataType::Char { length: Some(1) };
69                    }
70                    _ => {}
71                }
72                // Also transform column data types through Fabric's type mappings.
73                // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
74                let tsql = TSQLDialect;
75                if let Ok(Expression::DataType(tsql_dt)) =
76                    tsql.transform_data_type(col.data_type.clone())
77                {
78                    col.data_type = tsql_dt;
79                }
80                if let Expression::DataType(new_dt) =
81                    self.transform_fabric_data_type(col.data_type.clone())?
82                {
83                    col.data_type = new_dt;
84                }
85            }
86            return Ok(Expression::CreateTable(ct));
87        }
88
89        // Handle DataType::Timestamp specially BEFORE T-SQL transform
90        // because TSQL loses precision info when converting Timestamp to DATETIME2
91        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
92            let p = FabricDialect::cap_precision(*precision, 6);
93            return Ok(Expression::DataType(DataType::Custom {
94                name: format!("DATETIME2({})", p),
95            }));
96        }
97
98        // Handle DataType::Time specially BEFORE T-SQL transform
99        // to ensure we get default precision of 6
100        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
101            let p = FabricDialect::cap_precision(*precision, 6);
102            return Ok(Expression::DataType(DataType::Custom {
103                name: format!("TIME({})", p),
104            }));
105        }
106
107        // Handle DataType::Decimal specially BEFORE T-SQL transform
108        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
109        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
110            let name = if let (Some(p), Some(s)) = (precision, scale) {
111                format!("DECIMAL({}, {})", p, s)
112            } else if let Some(p) = precision {
113                format!("DECIMAL({})", p)
114            } else {
115                "DECIMAL".to_string()
116            };
117            return Ok(Expression::DataType(DataType::Custom { name }));
118        }
119
120        // Handle AT TIME ZONE with TIMESTAMPTZ cast
121        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
122        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
123        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
124        if let Expression::AtTimeZone(ref at_tz) = expr {
125            // Check if this contains a TIMESTAMPTZ cast
126            if let Expression::Cast(ref inner_cast) = at_tz.this {
127                if let DataType::Timestamp {
128                    timezone: true,
129                    precision,
130                } = &inner_cast.to
131                {
132                    // Get precision, default 6, cap at 6
133                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
134
135                    // Create inner DATETIMEOFFSET cast
136                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
137                        this: inner_cast.this.clone(),
138                        to: DataType::Custom {
139                            name: format!("DATETIMEOFFSET({})", capped_precision),
140                        },
141                        trailing_comments: inner_cast.trailing_comments.clone(),
142                        double_colon_syntax: false,
143                        format: None,
144                        default: None,
145                        inferred_type: None,
146                    }));
147
148                    // Create new AT TIME ZONE with DATETIMEOFFSET
149                    let new_at_tz =
150                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
151                            this: datetimeoffset_cast,
152                            zone: at_tz.zone.clone(),
153                        }));
154
155                    // Wrap in outer DATETIME2 cast
156                    return Ok(Expression::Cast(Box::new(Cast {
157                        this: new_at_tz,
158                        to: DataType::Custom {
159                            name: format!("DATETIME2({})", capped_precision),
160                        },
161                        trailing_comments: Vec::new(),
162                        double_colon_syntax: false,
163                        format: None,
164                        default: None,
165                        inferred_type: None,
166                    })));
167                }
168            }
169        }
170
171        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
172        // Reference: Python sqlglot Fabric dialect unixtotime_sql
173        if let Expression::UnixToTime(ref f) = expr {
174            // Build: column * 1e6
175            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
176                left: (*f.this).clone(),
177                right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
178                left_comments: Vec::new(),
179                operator_comments: Vec::new(),
180                trailing_comments: Vec::new(),
181                inferred_type: None,
182            }));
183
184            // Build: ROUND(column * 1e6, 0)
185            let round_expr = Expression::Function(Box::new(Function::new(
186                "ROUND".to_string(),
187                vec![
188                    column_times_1e6,
189                    Expression::Literal(Box::new(Literal::Number("0".to_string()))),
190                ],
191            )));
192
193            // Build: CAST(ROUND(...) AS BIGINT)
194            let cast_to_bigint = Expression::Cast(Box::new(Cast {
195                this: round_expr,
196                to: DataType::BigInt { length: None },
197                trailing_comments: Vec::new(),
198                double_colon_syntax: false,
199                format: None,
200                default: None,
201                inferred_type: None,
202            }));
203
204            // Build: CAST('1970-01-01' AS DATETIME2(6))
205            let epoch_start = Expression::Cast(Box::new(Cast {
206                this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
207                to: DataType::Custom {
208                    name: "DATETIME2(6)".to_string(),
209                },
210                trailing_comments: Vec::new(),
211                double_colon_syntax: false,
212                format: None,
213                default: None,
214                inferred_type: None,
215            }));
216
217            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
218            let dateadd = Expression::Function(Box::new(Function::new(
219                "DATEADD".to_string(),
220                vec![
221                    Expression::Identifier(Identifier::new("MICROSECONDS")),
222                    cast_to_bigint,
223                    epoch_start,
224                ],
225            )));
226
227            return Ok(dateadd);
228        }
229
230        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
231        // Reference: Python sqlglot Fabric dialect unixtotime_sql
232        if let Expression::Function(ref f) = expr {
233            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
234                let timestamp_input = f.args[0].clone();
235
236                // Build: column * 1e6
237                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
238                    left: timestamp_input,
239                    right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
240                    left_comments: Vec::new(),
241                    operator_comments: Vec::new(),
242                    trailing_comments: Vec::new(),
243                    inferred_type: None,
244                }));
245
246                // Build: ROUND(column * 1e6, 0)
247                let round_expr = Expression::Function(Box::new(Function::new(
248                    "ROUND".to_string(),
249                    vec![
250                        column_times_1e6,
251                        Expression::Literal(Box::new(Literal::Number("0".to_string()))),
252                    ],
253                )));
254
255                // Build: CAST(ROUND(...) AS BIGINT)
256                let cast_to_bigint = Expression::Cast(Box::new(Cast {
257                    this: round_expr,
258                    to: DataType::BigInt { length: None },
259                    trailing_comments: Vec::new(),
260                    double_colon_syntax: false,
261                    format: None,
262                    default: None,
263                    inferred_type: None,
264                }));
265
266                // Build: CAST('1970-01-01' AS DATETIME2(6))
267                let epoch_start = Expression::Cast(Box::new(Cast {
268                    this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
269                    to: DataType::Custom {
270                        name: "DATETIME2(6)".to_string(),
271                    },
272                    trailing_comments: Vec::new(),
273                    double_colon_syntax: false,
274                    format: None,
275                    default: None,
276                    inferred_type: None,
277                }));
278
279                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
280                let dateadd = Expression::Function(Box::new(Function::new(
281                    "DATEADD".to_string(),
282                    vec![
283                        Expression::Identifier(Identifier::new("MICROSECONDS")),
284                        cast_to_bigint,
285                        epoch_start,
286                    ],
287                )));
288
289                return Ok(dateadd);
290            }
291        }
292
293        // Delegate to T-SQL for other transformations
294        let tsql = TSQLDialect;
295        let transformed = tsql.transform_expr(expr)?;
296
297        // Apply Fabric-specific transformations to the result
298        self.transform_fabric_expr(transformed)
299    }
300}
301
302#[cfg(feature = "transpile")]
303impl FabricDialect {
304    /// Fabric-specific expression transformations
305    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
306        match expr {
307            // Handle DataType expressions with Fabric-specific type mappings
308            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
309
310            // Pass through everything else
311            _ => Ok(expr),
312        }
313    }
314
315    /// Transform data types according to Fabric TYPE_MAPPING
316    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
317    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
318        let transformed = match dt {
319            // TIMESTAMP -> DATETIME2(6) with precision handling
320            // Note: TSQL already converts this to DATETIME2, but without precision
321            DataType::Timestamp { precision, .. } => {
322                let p = Self::cap_precision(precision, 6);
323                DataType::Custom {
324                    name: format!("DATETIME2({})", p),
325                }
326            }
327
328            // TIME -> TIME(6) default, capped at 6
329            DataType::Time { precision, .. } => {
330                let p = Self::cap_precision(precision, 6);
331                DataType::Custom {
332                    name: format!("TIME({})", p),
333                }
334            }
335
336            // INT -> INT (override TSQL which may output INTEGER)
337            DataType::Int { .. } => DataType::Custom {
338                name: "INT".to_string(),
339            },
340
341            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
342            DataType::Decimal { precision, scale } => {
343                if let (Some(p), Some(s)) = (&precision, &scale) {
344                    DataType::Custom {
345                        name: format!("DECIMAL({}, {})", p, s),
346                    }
347                } else if let Some(p) = &precision {
348                    DataType::Custom {
349                        name: format!("DECIMAL({})", p),
350                    }
351                } else {
352                    DataType::Custom {
353                        name: "DECIMAL".to_string(),
354                    }
355                }
356            }
357
358            // JSON -> VARCHAR
359            DataType::Json => DataType::Custom {
360                name: "VARCHAR".to_string(),
361            },
362
363            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
364            DataType::Uuid => DataType::Custom {
365                name: "UNIQUEIDENTIFIER".to_string(),
366            },
367
368            // TinyInt -> SMALLINT
369            DataType::TinyInt { .. } => DataType::Custom {
370                name: "SMALLINT".to_string(),
371            },
372
373            // Handle Custom types for Fabric-specific mappings
374            DataType::Custom { ref name } => {
375                let upper = name.to_uppercase();
376
377                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
378                let (base_name, precision, scale) =
379                    TSQLDialect::parse_type_precision_and_scale(&upper);
380                let has_max_length = upper.contains("(MAX)");
381
382                match base_name.as_str() {
383                    // DATETIME -> DATETIME2(6)
384                    "DATETIME" => DataType::Custom {
385                        name: "DATETIME2(6)".to_string(),
386                    },
387
388                    // SMALLDATETIME -> DATETIME2(6)
389                    "SMALLDATETIME" => DataType::Custom {
390                        name: "DATETIME2(6)".to_string(),
391                    },
392
393                    // DATETIME2 -> DATETIME2(6) default, cap at 6
394                    "DATETIME2" => {
395                        let p = Self::cap_precision(precision, 6);
396                        DataType::Custom {
397                            name: format!("DATETIME2({})", p),
398                        }
399                    }
400
401                    // DATETIMEOFFSET -> cap precision at 6
402                    "DATETIMEOFFSET" => {
403                        let p = Self::cap_precision(precision, 6);
404                        DataType::Custom {
405                            name: format!("DATETIMEOFFSET({})", p),
406                        }
407                    }
408
409                    // TIME -> TIME(6) default, cap at 6
410                    "TIME" => {
411                        let p = Self::cap_precision(precision, 6);
412                        DataType::Custom {
413                            name: format!("TIME({})", p),
414                        }
415                    }
416
417                    // TIMESTAMP -> DATETIME2(6)
418                    "TIMESTAMP" => DataType::Custom {
419                        name: "DATETIME2(6)".to_string(),
420                    },
421
422                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
423                    "TIMESTAMPNTZ" => {
424                        let p = Self::cap_precision(precision, 6);
425                        DataType::Custom {
426                            name: format!("DATETIME2({})", p),
427                        }
428                    }
429
430                    // TIMESTAMPTZ -> DATETIME2(6) with precision
431                    "TIMESTAMPTZ" => {
432                        let p = Self::cap_precision(precision, 6);
433                        DataType::Custom {
434                            name: format!("DATETIME2({})", p),
435                        }
436                    }
437
438                    // IMAGE -> VARBINARY
439                    "IMAGE" => DataType::Custom {
440                        name: "VARBINARY".to_string(),
441                    },
442
443                    // MONEY -> DECIMAL
444                    "MONEY" => DataType::Custom {
445                        name: "DECIMAL".to_string(),
446                    },
447
448                    // SMALLMONEY -> DECIMAL
449                    "SMALLMONEY" => DataType::Custom {
450                        name: "DECIMAL".to_string(),
451                    },
452
453                    // NCHAR -> CHAR (with length preserved)
454                    "NCHAR" => {
455                        if has_max_length {
456                            DataType::Custom {
457                                name: "CHAR(MAX)".to_string(),
458                            }
459                        } else if let Some(len) = precision {
460                            DataType::Custom {
461                                name: format!("CHAR({})", len),
462                            }
463                        } else {
464                            DataType::Custom {
465                                name: "CHAR".to_string(),
466                            }
467                        }
468                    }
469
470                    // NVARCHAR -> VARCHAR (with length preserved)
471                    "NVARCHAR" => {
472                        if has_max_length {
473                            DataType::Custom {
474                                name: "VARCHAR(MAX)".to_string(),
475                            }
476                        } else if let Some(len) = precision {
477                            DataType::Custom {
478                                name: format!("VARCHAR({})", len),
479                            }
480                        } else {
481                            DataType::Custom {
482                                name: "VARCHAR".to_string(),
483                            }
484                        }
485                    }
486
487                    // TINYINT -> SMALLINT
488                    "TINYINT" => DataType::Custom {
489                        name: "SMALLINT".to_string(),
490                    },
491
492                    // UTINYINT -> SMALLINT
493                    "UTINYINT" => DataType::Custom {
494                        name: "SMALLINT".to_string(),
495                    },
496
497                    // VARIANT -> SQL_VARIANT
498                    "VARIANT" => DataType::Custom {
499                        name: "SQL_VARIANT".to_string(),
500                    },
501
502                    // XML -> VARCHAR
503                    "XML" => DataType::Custom {
504                        name: "VARCHAR".to_string(),
505                    },
506
507                    // NUMERIC -> DECIMAL (override TSQL's conversion)
508                    // Fabric uses DECIMAL, not NUMERIC
509                    "NUMERIC" => {
510                        if let (Some(p), Some(s)) = (precision, scale) {
511                            DataType::Custom {
512                                name: format!("DECIMAL({}, {})", p, s),
513                            }
514                        } else if let Some(p) = precision {
515                            DataType::Custom {
516                                name: format!("DECIMAL({})", p),
517                            }
518                        } else {
519                            DataType::Custom {
520                                name: "DECIMAL".to_string(),
521                            }
522                        }
523                    }
524
525                    // Pass through other custom types unchanged
526                    _ => dt,
527                }
528            }
529
530            // Keep all other types as transformed by TSQL
531            other => other,
532        };
533
534        Ok(Expression::DataType(transformed))
535    }
536
537    /// Cap precision to max value, defaulting to max if not specified
538    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
539        match precision {
540            Some(p) if p > max => max,
541            Some(p) => p,
542            None => max, // Default to max if not specified
543        }
544    }
545}