polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20#[cfg(feature = "generate")]
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
25pub struct FabricDialect;
26
27impl DialectImpl for FabricDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Fabric
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 // Inherit from T-SQL
34 let tsql = TSQLDialect;
35 tsql.tokenizer_config()
36 }
37
38 #[cfg(feature = "generate")]
39
40 fn generator_config(&self) -> GeneratorConfig {
41 use crate::generator::IdentifierQuoteStyle;
42 // Inherit from T-SQL with Fabric dialect type
43 GeneratorConfig {
44 // Use square brackets like T-SQL
45 identifier_quote: '[',
46 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
47 dialect: Some(DialectType::Fabric),
48 null_ordering_supported: false,
49 ..Default::default()
50 }
51 }
52
53 #[cfg(feature = "transpile")]
54
55 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
56 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
57 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
58 if let Expression::CreateTable(mut ct) = expr {
59 for col in &mut ct.columns {
60 match &col.data_type {
61 DataType::VarChar { length: None, .. } => {
62 col.data_type = DataType::VarChar {
63 length: Some(1),
64 parenthesized_length: false,
65 };
66 }
67 DataType::Char { length: None } => {
68 col.data_type = DataType::Char { length: Some(1) };
69 }
70 _ => {}
71 }
72 // Also transform column data types through Fabric's type mappings.
73 // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
74 let tsql = TSQLDialect;
75 if let Ok(Expression::DataType(tsql_dt)) =
76 tsql.transform_data_type(col.data_type.clone())
77 {
78 col.data_type = tsql_dt;
79 }
80 if let Expression::DataType(new_dt) =
81 self.transform_fabric_data_type(col.data_type.clone())?
82 {
83 col.data_type = new_dt;
84 }
85 }
86 return Ok(Expression::CreateTable(ct));
87 }
88
89 // Handle DataType::Timestamp specially BEFORE T-SQL transform
90 // because TSQL loses precision info when converting Timestamp to DATETIME2
91 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
92 let p = FabricDialect::cap_precision(*precision, 6);
93 return Ok(Expression::DataType(DataType::Custom {
94 name: format!("DATETIME2({})", p),
95 }));
96 }
97
98 // Handle DataType::Time specially BEFORE T-SQL transform
99 // to ensure we get default precision of 6
100 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
101 let p = FabricDialect::cap_precision(*precision, 6);
102 return Ok(Expression::DataType(DataType::Custom {
103 name: format!("TIME({})", p),
104 }));
105 }
106
107 // Handle DataType::Decimal specially BEFORE T-SQL transform
108 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
109 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
110 let name = if let (Some(p), Some(s)) = (precision, scale) {
111 format!("DECIMAL({}, {})", p, s)
112 } else if let Some(p) = precision {
113 format!("DECIMAL({})", p)
114 } else {
115 "DECIMAL".to_string()
116 };
117 return Ok(Expression::DataType(DataType::Custom { name }));
118 }
119
120 // Handle AT TIME ZONE with TIMESTAMPTZ cast
121 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
122 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
123 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
124 if let Expression::AtTimeZone(ref at_tz) = expr {
125 // Check if this contains a TIMESTAMPTZ cast
126 if let Expression::Cast(ref inner_cast) = at_tz.this {
127 if let DataType::Timestamp {
128 timezone: true,
129 precision,
130 } = &inner_cast.to
131 {
132 // Get precision, default 6, cap at 6
133 let capped_precision = FabricDialect::cap_precision(*precision, 6);
134
135 // Create inner DATETIMEOFFSET cast
136 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
137 this: inner_cast.this.clone(),
138 to: DataType::Custom {
139 name: format!("DATETIMEOFFSET({})", capped_precision),
140 },
141 trailing_comments: inner_cast.trailing_comments.clone(),
142 double_colon_syntax: false,
143 format: None,
144 default: None,
145 inferred_type: None,
146 }));
147
148 // Create new AT TIME ZONE with DATETIMEOFFSET
149 let new_at_tz =
150 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
151 this: datetimeoffset_cast,
152 zone: at_tz.zone.clone(),
153 }));
154
155 // Wrap in outer DATETIME2 cast
156 return Ok(Expression::Cast(Box::new(Cast {
157 this: new_at_tz,
158 to: DataType::Custom {
159 name: format!("DATETIME2({})", capped_precision),
160 },
161 trailing_comments: Vec::new(),
162 double_colon_syntax: false,
163 format: None,
164 default: None,
165 inferred_type: None,
166 })));
167 }
168 }
169 }
170
171 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
172 // Reference: Python sqlglot Fabric dialect unixtotime_sql
173 if let Expression::UnixToTime(ref f) = expr {
174 // Build: column * 1e6
175 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
176 left: (*f.this).clone(),
177 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
178 left_comments: Vec::new(),
179 operator_comments: Vec::new(),
180 trailing_comments: Vec::new(),
181 inferred_type: None,
182 }));
183
184 // Build: ROUND(column * 1e6, 0)
185 let round_expr = Expression::Function(Box::new(Function::new(
186 "ROUND".to_string(),
187 vec![
188 column_times_1e6,
189 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
190 ],
191 )));
192
193 // Build: CAST(ROUND(...) AS BIGINT)
194 let cast_to_bigint = Expression::Cast(Box::new(Cast {
195 this: round_expr,
196 to: DataType::BigInt { length: None },
197 trailing_comments: Vec::new(),
198 double_colon_syntax: false,
199 format: None,
200 default: None,
201 inferred_type: None,
202 }));
203
204 // Build: CAST('1970-01-01' AS DATETIME2(6))
205 let epoch_start = Expression::Cast(Box::new(Cast {
206 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
207 to: DataType::Custom {
208 name: "DATETIME2(6)".to_string(),
209 },
210 trailing_comments: Vec::new(),
211 double_colon_syntax: false,
212 format: None,
213 default: None,
214 inferred_type: None,
215 }));
216
217 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
218 let dateadd = Expression::Function(Box::new(Function::new(
219 "DATEADD".to_string(),
220 vec![
221 Expression::Identifier(Identifier::new("MICROSECONDS")),
222 cast_to_bigint,
223 epoch_start,
224 ],
225 )));
226
227 return Ok(dateadd);
228 }
229
230 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
231 // Reference: Python sqlglot Fabric dialect unixtotime_sql
232 if let Expression::Function(ref f) = expr {
233 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
234 let timestamp_input = f.args[0].clone();
235
236 // Build: column * 1e6
237 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
238 left: timestamp_input,
239 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
240 left_comments: Vec::new(),
241 operator_comments: Vec::new(),
242 trailing_comments: Vec::new(),
243 inferred_type: None,
244 }));
245
246 // Build: ROUND(column * 1e6, 0)
247 let round_expr = Expression::Function(Box::new(Function::new(
248 "ROUND".to_string(),
249 vec![
250 column_times_1e6,
251 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
252 ],
253 )));
254
255 // Build: CAST(ROUND(...) AS BIGINT)
256 let cast_to_bigint = Expression::Cast(Box::new(Cast {
257 this: round_expr,
258 to: DataType::BigInt { length: None },
259 trailing_comments: Vec::new(),
260 double_colon_syntax: false,
261 format: None,
262 default: None,
263 inferred_type: None,
264 }));
265
266 // Build: CAST('1970-01-01' AS DATETIME2(6))
267 let epoch_start = Expression::Cast(Box::new(Cast {
268 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
269 to: DataType::Custom {
270 name: "DATETIME2(6)".to_string(),
271 },
272 trailing_comments: Vec::new(),
273 double_colon_syntax: false,
274 format: None,
275 default: None,
276 inferred_type: None,
277 }));
278
279 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
280 let dateadd = Expression::Function(Box::new(Function::new(
281 "DATEADD".to_string(),
282 vec![
283 Expression::Identifier(Identifier::new("MICROSECONDS")),
284 cast_to_bigint,
285 epoch_start,
286 ],
287 )));
288
289 return Ok(dateadd);
290 }
291 }
292
293 // Delegate to T-SQL for other transformations
294 let tsql = TSQLDialect;
295 let transformed = tsql.transform_expr(expr)?;
296
297 // Apply Fabric-specific transformations to the result
298 self.transform_fabric_expr(transformed)
299 }
300}
301
302#[cfg(feature = "transpile")]
303impl FabricDialect {
304 /// Fabric-specific expression transformations
305 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
306 match expr {
307 // Handle DataType expressions with Fabric-specific type mappings
308 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
309
310 // Pass through everything else
311 _ => Ok(expr),
312 }
313 }
314
315 /// Transform data types according to Fabric TYPE_MAPPING
316 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
317 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
318 let transformed = match dt {
319 // TIMESTAMP -> DATETIME2(6) with precision handling
320 // Note: TSQL already converts this to DATETIME2, but without precision
321 DataType::Timestamp { precision, .. } => {
322 let p = Self::cap_precision(precision, 6);
323 DataType::Custom {
324 name: format!("DATETIME2({})", p),
325 }
326 }
327
328 // TIME -> TIME(6) default, capped at 6
329 DataType::Time { precision, .. } => {
330 let p = Self::cap_precision(precision, 6);
331 DataType::Custom {
332 name: format!("TIME({})", p),
333 }
334 }
335
336 // INT -> INT (override TSQL which may output INTEGER)
337 DataType::Int { .. } => DataType::Custom {
338 name: "INT".to_string(),
339 },
340
341 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
342 DataType::Decimal { precision, scale } => {
343 if let (Some(p), Some(s)) = (&precision, &scale) {
344 DataType::Custom {
345 name: format!("DECIMAL({}, {})", p, s),
346 }
347 } else if let Some(p) = &precision {
348 DataType::Custom {
349 name: format!("DECIMAL({})", p),
350 }
351 } else {
352 DataType::Custom {
353 name: "DECIMAL".to_string(),
354 }
355 }
356 }
357
358 // JSON -> VARCHAR
359 DataType::Json => DataType::Custom {
360 name: "VARCHAR".to_string(),
361 },
362
363 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
364 DataType::Uuid => DataType::Custom {
365 name: "UNIQUEIDENTIFIER".to_string(),
366 },
367
368 // TinyInt -> SMALLINT
369 DataType::TinyInt { .. } => DataType::Custom {
370 name: "SMALLINT".to_string(),
371 },
372
373 // Handle Custom types for Fabric-specific mappings
374 DataType::Custom { ref name } => {
375 let upper = name.to_uppercase();
376
377 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
378 let (base_name, precision, scale) =
379 TSQLDialect::parse_type_precision_and_scale(&upper);
380 let has_max_length = upper.contains("(MAX)");
381
382 match base_name.as_str() {
383 // DATETIME -> DATETIME2(6)
384 "DATETIME" => DataType::Custom {
385 name: "DATETIME2(6)".to_string(),
386 },
387
388 // SMALLDATETIME -> DATETIME2(6)
389 "SMALLDATETIME" => DataType::Custom {
390 name: "DATETIME2(6)".to_string(),
391 },
392
393 // DATETIME2 -> DATETIME2(6) default, cap at 6
394 "DATETIME2" => {
395 let p = Self::cap_precision(precision, 6);
396 DataType::Custom {
397 name: format!("DATETIME2({})", p),
398 }
399 }
400
401 // DATETIMEOFFSET -> cap precision at 6
402 "DATETIMEOFFSET" => {
403 let p = Self::cap_precision(precision, 6);
404 DataType::Custom {
405 name: format!("DATETIMEOFFSET({})", p),
406 }
407 }
408
409 // TIME -> TIME(6) default, cap at 6
410 "TIME" => {
411 let p = Self::cap_precision(precision, 6);
412 DataType::Custom {
413 name: format!("TIME({})", p),
414 }
415 }
416
417 // TIMESTAMP -> DATETIME2(6)
418 "TIMESTAMP" => DataType::Custom {
419 name: "DATETIME2(6)".to_string(),
420 },
421
422 // TIMESTAMPNTZ -> DATETIME2(6) with precision
423 "TIMESTAMPNTZ" => {
424 let p = Self::cap_precision(precision, 6);
425 DataType::Custom {
426 name: format!("DATETIME2({})", p),
427 }
428 }
429
430 // TIMESTAMPTZ -> DATETIME2(6) with precision
431 "TIMESTAMPTZ" => {
432 let p = Self::cap_precision(precision, 6);
433 DataType::Custom {
434 name: format!("DATETIME2({})", p),
435 }
436 }
437
438 // IMAGE -> VARBINARY
439 "IMAGE" => DataType::Custom {
440 name: "VARBINARY".to_string(),
441 },
442
443 // MONEY -> DECIMAL
444 "MONEY" => DataType::Custom {
445 name: "DECIMAL".to_string(),
446 },
447
448 // SMALLMONEY -> DECIMAL
449 "SMALLMONEY" => DataType::Custom {
450 name: "DECIMAL".to_string(),
451 },
452
453 // NCHAR -> CHAR (with length preserved)
454 "NCHAR" => {
455 if has_max_length {
456 DataType::Custom {
457 name: "CHAR(MAX)".to_string(),
458 }
459 } else if let Some(len) = precision {
460 DataType::Custom {
461 name: format!("CHAR({})", len),
462 }
463 } else {
464 DataType::Custom {
465 name: "CHAR".to_string(),
466 }
467 }
468 }
469
470 // NVARCHAR -> VARCHAR (with length preserved)
471 "NVARCHAR" => {
472 if has_max_length {
473 DataType::Custom {
474 name: "VARCHAR(MAX)".to_string(),
475 }
476 } else if let Some(len) = precision {
477 DataType::Custom {
478 name: format!("VARCHAR({})", len),
479 }
480 } else {
481 DataType::Custom {
482 name: "VARCHAR".to_string(),
483 }
484 }
485 }
486
487 // TINYINT -> SMALLINT
488 "TINYINT" => DataType::Custom {
489 name: "SMALLINT".to_string(),
490 },
491
492 // UTINYINT -> SMALLINT
493 "UTINYINT" => DataType::Custom {
494 name: "SMALLINT".to_string(),
495 },
496
497 // VARIANT -> SQL_VARIANT
498 "VARIANT" => DataType::Custom {
499 name: "SQL_VARIANT".to_string(),
500 },
501
502 // XML -> VARCHAR
503 "XML" => DataType::Custom {
504 name: "VARCHAR".to_string(),
505 },
506
507 // NUMERIC -> DECIMAL (override TSQL's conversion)
508 // Fabric uses DECIMAL, not NUMERIC
509 "NUMERIC" => {
510 if let (Some(p), Some(s)) = (precision, scale) {
511 DataType::Custom {
512 name: format!("DECIMAL({}, {})", p, s),
513 }
514 } else if let Some(p) = precision {
515 DataType::Custom {
516 name: format!("DECIMAL({})", p),
517 }
518 } else {
519 DataType::Custom {
520 name: "DECIMAL".to_string(),
521 }
522 }
523 }
524
525 // Pass through other custom types unchanged
526 _ => dt,
527 }
528 }
529
530 // Keep all other types as transformed by TSQL
531 other => other,
532 };
533
534 Ok(Expression::DataType(transformed))
535 }
536
537 /// Cap precision to max value, defaulting to max if not specified
538 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
539 match precision {
540 Some(p) if p > max => max,
541 Some(p) => p,
542 None => max, // Default to max if not specified
543 }
544 }
545}