Skip to main content

polyglot_sql/dialects/
fabric.rs

1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27    fn dialect_type(&self) -> DialectType {
28        DialectType::Fabric
29    }
30
31    fn tokenizer_config(&self) -> TokenizerConfig {
32        // Inherit from T-SQL
33        let tsql = TSQLDialect;
34        tsql.tokenizer_config()
35    }
36
37    fn generator_config(&self) -> GeneratorConfig {
38        use crate::generator::IdentifierQuoteStyle;
39        // Inherit from T-SQL with Fabric dialect type
40        GeneratorConfig {
41            // Use square brackets like T-SQL
42            identifier_quote: '[',
43            identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44            dialect: Some(DialectType::Fabric),
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51        // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52        if let Expression::CreateTable(mut ct) = expr {
53            for col in &mut ct.columns {
54                match &col.data_type {
55                    DataType::VarChar { length: None, .. } => {
56                        col.data_type = DataType::VarChar {
57                            length: Some(1),
58                            parenthesized_length: false,
59                        };
60                    }
61                    DataType::Char { length: None } => {
62                        col.data_type = DataType::Char { length: Some(1) };
63                    }
64                    _ => {}
65                }
66                // Also transform column data types through Fabric's type mappings
67                if let Expression::DataType(new_dt) =
68                    self.transform_fabric_data_type(col.data_type.clone())?
69                {
70                    col.data_type = new_dt;
71                }
72            }
73            return Ok(Expression::CreateTable(ct));
74        }
75
76        // Handle DataType::Timestamp specially BEFORE T-SQL transform
77        // because TSQL loses precision info when converting Timestamp to DATETIME2
78        if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79            let p = FabricDialect::cap_precision(*precision, 6);
80            return Ok(Expression::DataType(DataType::Custom {
81                name: format!("DATETIME2({})", p),
82            }));
83        }
84
85        // Handle DataType::Time specially BEFORE T-SQL transform
86        // to ensure we get default precision of 6
87        if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88            let p = FabricDialect::cap_precision(*precision, 6);
89            return Ok(Expression::DataType(DataType::Custom {
90                name: format!("TIME({})", p),
91            }));
92        }
93
94        // Handle DataType::Decimal specially BEFORE T-SQL transform
95        // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96        if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97            let name = if let (Some(p), Some(s)) = (precision, scale) {
98                format!("DECIMAL({}, {})", p, s)
99            } else if let Some(p) = precision {
100                format!("DECIMAL({})", p)
101            } else {
102                "DECIMAL".to_string()
103            };
104            return Ok(Expression::DataType(DataType::Custom { name }));
105        }
106
107        // Handle AT TIME ZONE with TIMESTAMPTZ cast
108        // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109        // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110        // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111        if let Expression::AtTimeZone(ref at_tz) = expr {
112            // Check if this contains a TIMESTAMPTZ cast
113            if let Expression::Cast(ref inner_cast) = at_tz.this {
114                if let DataType::Timestamp {
115                    timezone: true,
116                    precision,
117                } = &inner_cast.to
118                {
119                    // Get precision, default 6, cap at 6
120                    let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122                    // Create inner DATETIMEOFFSET cast
123                    let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124                        this: inner_cast.this.clone(),
125                        to: DataType::Custom {
126                            name: format!("DATETIMEOFFSET({})", capped_precision),
127                        },
128                        trailing_comments: inner_cast.trailing_comments.clone(),
129                        double_colon_syntax: false,
130                        format: None,
131                        default: None,
132                    }));
133
134                    // Create new AT TIME ZONE with DATETIMEOFFSET
135                    let new_at_tz =
136                        Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
137                            this: datetimeoffset_cast,
138                            zone: at_tz.zone.clone(),
139                        }));
140
141                    // Wrap in outer DATETIME2 cast
142                    return Ok(Expression::Cast(Box::new(Cast {
143                        this: new_at_tz,
144                        to: DataType::Custom {
145                            name: format!("DATETIME2({})", capped_precision),
146                        },
147                        trailing_comments: Vec::new(),
148                        double_colon_syntax: false,
149                        format: None,
150                        default: None,
151                    })));
152                }
153            }
154        }
155
156        // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
157        // Reference: Python sqlglot Fabric dialect unixtotime_sql
158        if let Expression::UnixToTime(ref f) = expr {
159            // Build: column * 1e6
160            let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
161                left: (*f.this).clone(),
162                right: Expression::Literal(Literal::Number("1e6".to_string())),
163                left_comments: Vec::new(),
164                operator_comments: Vec::new(),
165                trailing_comments: Vec::new(),
166            }));
167
168            // Build: ROUND(column * 1e6, 0)
169            let round_expr = Expression::Function(Box::new(Function::new(
170                "ROUND".to_string(),
171                vec![
172                    column_times_1e6,
173                    Expression::Literal(Literal::Number("0".to_string())),
174                ],
175            )));
176
177            // Build: CAST(ROUND(...) AS BIGINT)
178            let cast_to_bigint = Expression::Cast(Box::new(Cast {
179                this: round_expr,
180                to: DataType::BigInt { length: None },
181                trailing_comments: Vec::new(),
182                double_colon_syntax: false,
183                format: None,
184                default: None,
185            }));
186
187            // Build: CAST('1970-01-01' AS DATETIME2(6))
188            let epoch_start = Expression::Cast(Box::new(Cast {
189                this: Expression::Literal(Literal::String("1970-01-01".to_string())),
190                to: DataType::Custom {
191                    name: "DATETIME2(6)".to_string(),
192                },
193                trailing_comments: Vec::new(),
194                double_colon_syntax: false,
195                format: None,
196                default: None,
197            }));
198
199            // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
200            let dateadd = Expression::Function(Box::new(Function::new(
201                "DATEADD".to_string(),
202                vec![
203                    Expression::Identifier(Identifier::new("MICROSECONDS")),
204                    cast_to_bigint,
205                    epoch_start,
206                ],
207            )));
208
209            return Ok(dateadd);
210        }
211
212        // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
213        // Reference: Python sqlglot Fabric dialect unixtotime_sql
214        if let Expression::Function(ref f) = expr {
215            if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
216                let timestamp_input = f.args[0].clone();
217
218                // Build: column * 1e6
219                let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
220                    left: timestamp_input,
221                    right: Expression::Literal(Literal::Number("1e6".to_string())),
222                    left_comments: Vec::new(),
223                    operator_comments: Vec::new(),
224                    trailing_comments: Vec::new(),
225                }));
226
227                // Build: ROUND(column * 1e6, 0)
228                let round_expr = Expression::Function(Box::new(Function::new(
229                    "ROUND".to_string(),
230                    vec![
231                        column_times_1e6,
232                        Expression::Literal(Literal::Number("0".to_string())),
233                    ],
234                )));
235
236                // Build: CAST(ROUND(...) AS BIGINT)
237                let cast_to_bigint = Expression::Cast(Box::new(Cast {
238                    this: round_expr,
239                    to: DataType::BigInt { length: None },
240                    trailing_comments: Vec::new(),
241                    double_colon_syntax: false,
242                    format: None,
243                    default: None,
244                }));
245
246                // Build: CAST('1970-01-01' AS DATETIME2(6))
247                let epoch_start = Expression::Cast(Box::new(Cast {
248                    this: Expression::Literal(Literal::String("1970-01-01".to_string())),
249                    to: DataType::Custom {
250                        name: "DATETIME2(6)".to_string(),
251                    },
252                    trailing_comments: Vec::new(),
253                    double_colon_syntax: false,
254                    format: None,
255                    default: None,
256                }));
257
258                // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
259                let dateadd = Expression::Function(Box::new(Function::new(
260                    "DATEADD".to_string(),
261                    vec![
262                        Expression::Identifier(Identifier::new("MICROSECONDS")),
263                        cast_to_bigint,
264                        epoch_start,
265                    ],
266                )));
267
268                return Ok(dateadd);
269            }
270        }
271
272        // Delegate to T-SQL for other transformations
273        let tsql = TSQLDialect;
274        let transformed = tsql.transform_expr(expr)?;
275
276        // Apply Fabric-specific transformations to the result
277        self.transform_fabric_expr(transformed)
278    }
279}
280
281impl FabricDialect {
282    /// Fabric-specific expression transformations
283    fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
284        match expr {
285            // Handle DataType expressions with Fabric-specific type mappings
286            Expression::DataType(dt) => self.transform_fabric_data_type(dt),
287
288            // Pass through everything else
289            _ => Ok(expr),
290        }
291    }
292
293    /// Transform data types according to Fabric TYPE_MAPPING
294    /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
295    fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
296        let transformed = match dt {
297            // TIMESTAMP -> DATETIME2(6) with precision handling
298            // Note: TSQL already converts this to DATETIME2, but without precision
299            DataType::Timestamp { precision, .. } => {
300                let p = Self::cap_precision(precision, 6);
301                DataType::Custom {
302                    name: format!("DATETIME2({})", p),
303                }
304            }
305
306            // TIME -> TIME(6) default, capped at 6
307            DataType::Time { precision, .. } => {
308                let p = Self::cap_precision(precision, 6);
309                DataType::Custom {
310                    name: format!("TIME({})", p),
311                }
312            }
313
314            // INT -> INT (override TSQL which may output INTEGER)
315            DataType::Int { .. } => DataType::Custom {
316                name: "INT".to_string(),
317            },
318
319            // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
320            DataType::Decimal { precision, scale } => {
321                if let (Some(p), Some(s)) = (&precision, &scale) {
322                    DataType::Custom {
323                        name: format!("DECIMAL({}, {})", p, s),
324                    }
325                } else if let Some(p) = &precision {
326                    DataType::Custom {
327                        name: format!("DECIMAL({})", p),
328                    }
329                } else {
330                    DataType::Custom {
331                        name: "DECIMAL".to_string(),
332                    }
333                }
334            }
335
336            // JSON -> VARCHAR
337            DataType::Json => DataType::Custom {
338                name: "VARCHAR".to_string(),
339            },
340
341            // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
342            DataType::Uuid => DataType::Custom {
343                name: "UNIQUEIDENTIFIER".to_string(),
344            },
345
346            // TinyInt -> SMALLINT
347            DataType::TinyInt { .. } => DataType::Custom {
348                name: "SMALLINT".to_string(),
349            },
350
351            // Handle Custom types for Fabric-specific mappings
352            DataType::Custom { ref name } => {
353                let upper = name.to_uppercase();
354
355                // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
356                let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
357
358                match base_name.as_str() {
359                    // DATETIME -> DATETIME2(6)
360                    "DATETIME" => DataType::Custom {
361                        name: "DATETIME2(6)".to_string(),
362                    },
363
364                    // SMALLDATETIME -> DATETIME2(6)
365                    "SMALLDATETIME" => DataType::Custom {
366                        name: "DATETIME2(6)".to_string(),
367                    },
368
369                    // DATETIME2 -> DATETIME2(6) default, cap at 6
370                    "DATETIME2" => {
371                        let p = Self::cap_precision(precision, 6);
372                        DataType::Custom {
373                            name: format!("DATETIME2({})", p),
374                        }
375                    }
376
377                    // DATETIMEOFFSET -> cap precision at 6
378                    "DATETIMEOFFSET" => {
379                        let p = Self::cap_precision(precision, 6);
380                        DataType::Custom {
381                            name: format!("DATETIMEOFFSET({})", p),
382                        }
383                    }
384
385                    // TIME -> TIME(6) default, cap at 6
386                    "TIME" => {
387                        let p = Self::cap_precision(precision, 6);
388                        DataType::Custom {
389                            name: format!("TIME({})", p),
390                        }
391                    }
392
393                    // TIMESTAMP -> DATETIME2(6)
394                    "TIMESTAMP" => DataType::Custom {
395                        name: "DATETIME2(6)".to_string(),
396                    },
397
398                    // TIMESTAMPNTZ -> DATETIME2(6) with precision
399                    "TIMESTAMPNTZ" => {
400                        let p = Self::cap_precision(precision, 6);
401                        DataType::Custom {
402                            name: format!("DATETIME2({})", p),
403                        }
404                    }
405
406                    // TIMESTAMPTZ -> DATETIME2(6) with precision
407                    "TIMESTAMPTZ" => {
408                        let p = Self::cap_precision(precision, 6);
409                        DataType::Custom {
410                            name: format!("DATETIME2({})", p),
411                        }
412                    }
413
414                    // IMAGE -> VARBINARY
415                    "IMAGE" => DataType::Custom {
416                        name: "VARBINARY".to_string(),
417                    },
418
419                    // MONEY -> DECIMAL
420                    "MONEY" => DataType::Custom {
421                        name: "DECIMAL".to_string(),
422                    },
423
424                    // SMALLMONEY -> DECIMAL
425                    "SMALLMONEY" => DataType::Custom {
426                        name: "DECIMAL".to_string(),
427                    },
428
429                    // NCHAR -> CHAR (with length preserved)
430                    "NCHAR" => {
431                        if let Some(len) = precision {
432                            DataType::Custom {
433                                name: format!("CHAR({})", len),
434                            }
435                        } else {
436                            DataType::Custom {
437                                name: "CHAR".to_string(),
438                            }
439                        }
440                    }
441
442                    // NVARCHAR -> VARCHAR (with length preserved)
443                    "NVARCHAR" => {
444                        if let Some(len) = precision {
445                            DataType::Custom {
446                                name: format!("VARCHAR({})", len),
447                            }
448                        } else {
449                            DataType::Custom {
450                                name: "VARCHAR".to_string(),
451                            }
452                        }
453                    }
454
455                    // TINYINT -> SMALLINT
456                    "TINYINT" => DataType::Custom {
457                        name: "SMALLINT".to_string(),
458                    },
459
460                    // UTINYINT -> SMALLINT
461                    "UTINYINT" => DataType::Custom {
462                        name: "SMALLINT".to_string(),
463                    },
464
465                    // VARIANT -> SQL_VARIANT
466                    "VARIANT" => DataType::Custom {
467                        name: "SQL_VARIANT".to_string(),
468                    },
469
470                    // XML -> VARCHAR
471                    "XML" => DataType::Custom {
472                        name: "VARCHAR".to_string(),
473                    },
474
475                    // NUMERIC -> DECIMAL (override TSQL's conversion)
476                    // Fabric uses DECIMAL, not NUMERIC
477                    "NUMERIC" => {
478                        if let (Some(p), Some(s)) = (precision, scale) {
479                            DataType::Custom {
480                                name: format!("DECIMAL({}, {})", p, s),
481                            }
482                        } else if let Some(p) = precision {
483                            DataType::Custom {
484                                name: format!("DECIMAL({})", p),
485                            }
486                        } else {
487                            DataType::Custom {
488                                name: "DECIMAL".to_string(),
489                            }
490                        }
491                    }
492
493                    // Pass through other custom types unchanged
494                    _ => dt,
495                }
496            }
497
498            // Keep all other types as transformed by TSQL
499            other => other,
500        };
501
502        Ok(Expression::DataType(transformed))
503    }
504
505    /// Cap precision to max value, defaulting to max if not specified
506    fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
507        match precision {
508            Some(p) if p > max => max,
509            Some(p) => p,
510            None => max, // Default to max if not specified
511        }
512    }
513
514    /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
515    fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
516        if let Some(paren_pos) = name.find('(') {
517            let base = name[..paren_pos].to_string();
518            let rest = &name[paren_pos + 1..];
519            if let Some(close_pos) = rest.find(')') {
520                let args = &rest[..close_pos];
521                let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
522
523                let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
524                let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
525
526                return (base, precision, scale);
527            }
528            (base, None, None)
529        } else {
530            (name.to_string(), None, None)
531        }
532    }
533}