polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Fabric
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 // Inherit from T-SQL
33 let tsql = TSQLDialect;
34 tsql.tokenizer_config()
35 }
36
37 fn generator_config(&self) -> GeneratorConfig {
38 use crate::generator::IdentifierQuoteStyle;
39 // Inherit from T-SQL with Fabric dialect type
40 GeneratorConfig {
41 // Use square brackets like T-SQL
42 identifier_quote: '[',
43 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44 dialect: Some(DialectType::Fabric),
45 ..Default::default()
46 }
47 }
48
49 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52 if let Expression::CreateTable(mut ct) = expr {
53 for col in &mut ct.columns {
54 match &col.data_type {
55 DataType::VarChar { length: None, .. } => {
56 col.data_type = DataType::VarChar {
57 length: Some(1),
58 parenthesized_length: false,
59 };
60 }
61 DataType::Char { length: None } => {
62 col.data_type = DataType::Char { length: Some(1) };
63 }
64 _ => {}
65 }
66 // Also transform column data types through Fabric's type mappings
67 if let Expression::DataType(new_dt) =
68 self.transform_fabric_data_type(col.data_type.clone())?
69 {
70 col.data_type = new_dt;
71 }
72 }
73 return Ok(Expression::CreateTable(ct));
74 }
75
76 // Handle DataType::Timestamp specially BEFORE T-SQL transform
77 // because TSQL loses precision info when converting Timestamp to DATETIME2
78 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79 let p = FabricDialect::cap_precision(*precision, 6);
80 return Ok(Expression::DataType(DataType::Custom {
81 name: format!("DATETIME2({})", p),
82 }));
83 }
84
85 // Handle DataType::Time specially BEFORE T-SQL transform
86 // to ensure we get default precision of 6
87 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88 let p = FabricDialect::cap_precision(*precision, 6);
89 return Ok(Expression::DataType(DataType::Custom {
90 name: format!("TIME({})", p),
91 }));
92 }
93
94 // Handle DataType::Decimal specially BEFORE T-SQL transform
95 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97 let name = if let (Some(p), Some(s)) = (precision, scale) {
98 format!("DECIMAL({}, {})", p, s)
99 } else if let Some(p) = precision {
100 format!("DECIMAL({})", p)
101 } else {
102 "DECIMAL".to_string()
103 };
104 return Ok(Expression::DataType(DataType::Custom { name }));
105 }
106
107 // Handle AT TIME ZONE with TIMESTAMPTZ cast
108 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111 if let Expression::AtTimeZone(ref at_tz) = expr {
112 // Check if this contains a TIMESTAMPTZ cast
113 if let Expression::Cast(ref inner_cast) = at_tz.this {
114 if let DataType::Timestamp {
115 timezone: true,
116 precision,
117 } = &inner_cast.to
118 {
119 // Get precision, default 6, cap at 6
120 let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122 // Create inner DATETIMEOFFSET cast
123 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124 this: inner_cast.this.clone(),
125 to: DataType::Custom {
126 name: format!("DATETIMEOFFSET({})", capped_precision),
127 },
128 trailing_comments: inner_cast.trailing_comments.clone(),
129 double_colon_syntax: false,
130 format: None,
131 default: None,
132 }));
133
134 // Create new AT TIME ZONE with DATETIMEOFFSET
135 let new_at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
136 this: datetimeoffset_cast,
137 zone: at_tz.zone.clone(),
138 }));
139
140 // Wrap in outer DATETIME2 cast
141 return Ok(Expression::Cast(Box::new(Cast {
142 this: new_at_tz,
143 to: DataType::Custom {
144 name: format!("DATETIME2({})", capped_precision),
145 },
146 trailing_comments: Vec::new(),
147 double_colon_syntax: false,
148 format: None,
149 default: None,
150 })));
151 }
152 }
153 }
154
155 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
156 // Reference: Python sqlglot Fabric dialect unixtotime_sql
157 if let Expression::UnixToTime(ref f) = expr {
158 // Build: column * 1e6
159 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
160 left: (*f.this).clone(),
161 right: Expression::Literal(Literal::Number("1e6".to_string())),
162 left_comments: Vec::new(),
163 operator_comments: Vec::new(),
164 trailing_comments: Vec::new(),
165 }));
166
167 // Build: ROUND(column * 1e6, 0)
168 let round_expr = Expression::Function(Box::new(Function::new(
169 "ROUND".to_string(),
170 vec![
171 column_times_1e6,
172 Expression::Literal(Literal::Number("0".to_string())),
173 ],
174 )));
175
176 // Build: CAST(ROUND(...) AS BIGINT)
177 let cast_to_bigint = Expression::Cast(Box::new(Cast {
178 this: round_expr,
179 to: DataType::BigInt { length: None },
180 trailing_comments: Vec::new(),
181 double_colon_syntax: false,
182 format: None,
183 default: None,
184 }));
185
186 // Build: CAST('1970-01-01' AS DATETIME2(6))
187 let epoch_start = Expression::Cast(Box::new(Cast {
188 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
189 to: DataType::Custom {
190 name: "DATETIME2(6)".to_string(),
191 },
192 trailing_comments: Vec::new(),
193 double_colon_syntax: false,
194 format: None,
195 default: None,
196 }));
197
198 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
199 let dateadd = Expression::Function(Box::new(Function::new(
200 "DATEADD".to_string(),
201 vec![
202 Expression::Identifier(Identifier::new("MICROSECONDS")),
203 cast_to_bigint,
204 epoch_start,
205 ],
206 )));
207
208 return Ok(dateadd);
209 }
210
211 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
212 // Reference: Python sqlglot Fabric dialect unixtotime_sql
213 if let Expression::Function(ref f) = expr {
214 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
215 let timestamp_input = f.args[0].clone();
216
217 // Build: column * 1e6
218 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
219 left: timestamp_input,
220 right: Expression::Literal(Literal::Number("1e6".to_string())),
221 left_comments: Vec::new(),
222 operator_comments: Vec::new(),
223 trailing_comments: Vec::new(),
224 }));
225
226 // Build: ROUND(column * 1e6, 0)
227 let round_expr = Expression::Function(Box::new(Function::new(
228 "ROUND".to_string(),
229 vec![
230 column_times_1e6,
231 Expression::Literal(Literal::Number("0".to_string())),
232 ],
233 )));
234
235 // Build: CAST(ROUND(...) AS BIGINT)
236 let cast_to_bigint = Expression::Cast(Box::new(Cast {
237 this: round_expr,
238 to: DataType::BigInt { length: None },
239 trailing_comments: Vec::new(),
240 double_colon_syntax: false,
241 format: None,
242 default: None,
243 }));
244
245 // Build: CAST('1970-01-01' AS DATETIME2(6))
246 let epoch_start = Expression::Cast(Box::new(Cast {
247 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
248 to: DataType::Custom {
249 name: "DATETIME2(6)".to_string(),
250 },
251 trailing_comments: Vec::new(),
252 double_colon_syntax: false,
253 format: None,
254 default: None,
255 }));
256
257 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
258 let dateadd = Expression::Function(Box::new(Function::new(
259 "DATEADD".to_string(),
260 vec![
261 Expression::Identifier(Identifier::new("MICROSECONDS")),
262 cast_to_bigint,
263 epoch_start,
264 ],
265 )));
266
267 return Ok(dateadd);
268 }
269 }
270
271 // Delegate to T-SQL for other transformations
272 let tsql = TSQLDialect;
273 let transformed = tsql.transform_expr(expr)?;
274
275 // Apply Fabric-specific transformations to the result
276 self.transform_fabric_expr(transformed)
277 }
278}
279
280impl FabricDialect {
281 /// Fabric-specific expression transformations
282 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
283 match expr {
284 // Handle DataType expressions with Fabric-specific type mappings
285 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
286
287 // Pass through everything else
288 _ => Ok(expr),
289 }
290 }
291
292 /// Transform data types according to Fabric TYPE_MAPPING
293 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
294 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
295 let transformed = match dt {
296 // TIMESTAMP -> DATETIME2(6) with precision handling
297 // Note: TSQL already converts this to DATETIME2, but without precision
298 DataType::Timestamp { precision, .. } => {
299 let p = Self::cap_precision(precision, 6);
300 DataType::Custom {
301 name: format!("DATETIME2({})", p),
302 }
303 }
304
305 // TIME -> TIME(6) default, capped at 6
306 DataType::Time { precision, .. } => {
307 let p = Self::cap_precision(precision, 6);
308 DataType::Custom {
309 name: format!("TIME({})", p),
310 }
311 }
312
313 // INT -> INT (override TSQL which may output INTEGER)
314 DataType::Int { .. } => DataType::Custom {
315 name: "INT".to_string(),
316 },
317
318 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
319 DataType::Decimal { precision, scale } => {
320 if let (Some(p), Some(s)) = (&precision, &scale) {
321 DataType::Custom {
322 name: format!("DECIMAL({}, {})", p, s),
323 }
324 } else if let Some(p) = &precision {
325 DataType::Custom {
326 name: format!("DECIMAL({})", p),
327 }
328 } else {
329 DataType::Custom {
330 name: "DECIMAL".to_string(),
331 }
332 }
333 }
334
335 // JSON -> VARCHAR
336 DataType::Json => DataType::Custom {
337 name: "VARCHAR".to_string(),
338 },
339
340 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
341 DataType::Uuid => DataType::Custom {
342 name: "UNIQUEIDENTIFIER".to_string(),
343 },
344
345 // TinyInt -> SMALLINT
346 DataType::TinyInt { .. } => DataType::Custom {
347 name: "SMALLINT".to_string(),
348 },
349
350 // Handle Custom types for Fabric-specific mappings
351 DataType::Custom { ref name } => {
352 let upper = name.to_uppercase();
353
354 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
355 let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
356
357 match base_name.as_str() {
358 // DATETIME -> DATETIME2(6)
359 "DATETIME" => DataType::Custom {
360 name: "DATETIME2(6)".to_string(),
361 },
362
363 // SMALLDATETIME -> DATETIME2(6)
364 "SMALLDATETIME" => DataType::Custom {
365 name: "DATETIME2(6)".to_string(),
366 },
367
368 // DATETIME2 -> DATETIME2(6) default, cap at 6
369 "DATETIME2" => {
370 let p = Self::cap_precision(precision, 6);
371 DataType::Custom {
372 name: format!("DATETIME2({})", p),
373 }
374 }
375
376 // DATETIMEOFFSET -> cap precision at 6
377 "DATETIMEOFFSET" => {
378 let p = Self::cap_precision(precision, 6);
379 DataType::Custom {
380 name: format!("DATETIMEOFFSET({})", p),
381 }
382 }
383
384 // TIME -> TIME(6) default, cap at 6
385 "TIME" => {
386 let p = Self::cap_precision(precision, 6);
387 DataType::Custom {
388 name: format!("TIME({})", p),
389 }
390 }
391
392 // TIMESTAMP -> DATETIME2(6)
393 "TIMESTAMP" => DataType::Custom {
394 name: "DATETIME2(6)".to_string(),
395 },
396
397 // TIMESTAMPNTZ -> DATETIME2(6) with precision
398 "TIMESTAMPNTZ" => {
399 let p = Self::cap_precision(precision, 6);
400 DataType::Custom {
401 name: format!("DATETIME2({})", p),
402 }
403 }
404
405 // TIMESTAMPTZ -> DATETIME2(6) with precision
406 "TIMESTAMPTZ" => {
407 let p = Self::cap_precision(precision, 6);
408 DataType::Custom {
409 name: format!("DATETIME2({})", p),
410 }
411 }
412
413 // IMAGE -> VARBINARY
414 "IMAGE" => DataType::Custom {
415 name: "VARBINARY".to_string(),
416 },
417
418 // MONEY -> DECIMAL
419 "MONEY" => DataType::Custom {
420 name: "DECIMAL".to_string(),
421 },
422
423 // SMALLMONEY -> DECIMAL
424 "SMALLMONEY" => DataType::Custom {
425 name: "DECIMAL".to_string(),
426 },
427
428 // NCHAR -> CHAR (with length preserved)
429 "NCHAR" => {
430 if let Some(len) = precision {
431 DataType::Custom {
432 name: format!("CHAR({})", len),
433 }
434 } else {
435 DataType::Custom {
436 name: "CHAR".to_string(),
437 }
438 }
439 }
440
441 // NVARCHAR -> VARCHAR (with length preserved)
442 "NVARCHAR" => {
443 if let Some(len) = precision {
444 DataType::Custom {
445 name: format!("VARCHAR({})", len),
446 }
447 } else {
448 DataType::Custom {
449 name: "VARCHAR".to_string(),
450 }
451 }
452 }
453
454 // TINYINT -> SMALLINT
455 "TINYINT" => DataType::Custom {
456 name: "SMALLINT".to_string(),
457 },
458
459 // UTINYINT -> SMALLINT
460 "UTINYINT" => DataType::Custom {
461 name: "SMALLINT".to_string(),
462 },
463
464 // VARIANT -> SQL_VARIANT
465 "VARIANT" => DataType::Custom {
466 name: "SQL_VARIANT".to_string(),
467 },
468
469 // XML -> VARCHAR
470 "XML" => DataType::Custom {
471 name: "VARCHAR".to_string(),
472 },
473
474 // NUMERIC -> DECIMAL (override TSQL's conversion)
475 // Fabric uses DECIMAL, not NUMERIC
476 "NUMERIC" => {
477 if let (Some(p), Some(s)) = (precision, scale) {
478 DataType::Custom {
479 name: format!("DECIMAL({}, {})", p, s),
480 }
481 } else if let Some(p) = precision {
482 DataType::Custom {
483 name: format!("DECIMAL({})", p),
484 }
485 } else {
486 DataType::Custom {
487 name: "DECIMAL".to_string(),
488 }
489 }
490 }
491
492 // Pass through other custom types unchanged
493 _ => dt,
494 }
495 }
496
497 // Keep all other types as transformed by TSQL
498 other => other,
499 };
500
501 Ok(Expression::DataType(transformed))
502 }
503
504 /// Cap precision to max value, defaulting to max if not specified
505 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
506 match precision {
507 Some(p) if p > max => max,
508 Some(p) => p,
509 None => max, // Default to max if not specified
510 }
511 }
512
513 /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
514 fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
515 if let Some(paren_pos) = name.find('(') {
516 let base = name[..paren_pos].to_string();
517 let rest = &name[paren_pos + 1..];
518 if let Some(close_pos) = rest.find(')') {
519 let args = &rest[..close_pos];
520 let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
521
522 let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
523 let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
524
525 return (base, precision, scale);
526 }
527 (base, None, None)
528 } else {
529 (name.to_string(), None, None)
530 }
531 }
532}