polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Fabric
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 // Inherit from T-SQL
33 let tsql = TSQLDialect;
34 tsql.tokenizer_config()
35 }
36
37 fn generator_config(&self) -> GeneratorConfig {
38 use crate::generator::IdentifierQuoteStyle;
39 // Inherit from T-SQL with Fabric dialect type
40 GeneratorConfig {
41 // Use square brackets like T-SQL
42 identifier_quote: '[',
43 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44 dialect: Some(DialectType::Fabric),
45 ..Default::default()
46 }
47 }
48
49 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52 if let Expression::CreateTable(mut ct) = expr {
53 for col in &mut ct.columns {
54 match &col.data_type {
55 DataType::VarChar { length: None, .. } => {
56 col.data_type = DataType::VarChar {
57 length: Some(1),
58 parenthesized_length: false,
59 };
60 }
61 DataType::Char { length: None } => {
62 col.data_type = DataType::Char { length: Some(1) };
63 }
64 _ => {}
65 }
66 // Also transform column data types through Fabric's type mappings
67 if let Expression::DataType(new_dt) =
68 self.transform_fabric_data_type(col.data_type.clone())?
69 {
70 col.data_type = new_dt;
71 }
72 }
73 return Ok(Expression::CreateTable(ct));
74 }
75
76 // Handle DataType::Timestamp specially BEFORE T-SQL transform
77 // because TSQL loses precision info when converting Timestamp to DATETIME2
78 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79 let p = FabricDialect::cap_precision(*precision, 6);
80 return Ok(Expression::DataType(DataType::Custom {
81 name: format!("DATETIME2({})", p),
82 }));
83 }
84
85 // Handle DataType::Time specially BEFORE T-SQL transform
86 // to ensure we get default precision of 6
87 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88 let p = FabricDialect::cap_precision(*precision, 6);
89 return Ok(Expression::DataType(DataType::Custom {
90 name: format!("TIME({})", p),
91 }));
92 }
93
94 // Handle DataType::Decimal specially BEFORE T-SQL transform
95 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97 let name = if let (Some(p), Some(s)) = (precision, scale) {
98 format!("DECIMAL({}, {})", p, s)
99 } else if let Some(p) = precision {
100 format!("DECIMAL({})", p)
101 } else {
102 "DECIMAL".to_string()
103 };
104 return Ok(Expression::DataType(DataType::Custom { name }));
105 }
106
107 // Handle AT TIME ZONE with TIMESTAMPTZ cast
108 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111 if let Expression::AtTimeZone(ref at_tz) = expr {
112 // Check if this contains a TIMESTAMPTZ cast
113 if let Expression::Cast(ref inner_cast) = at_tz.this {
114 if let DataType::Timestamp {
115 timezone: true,
116 precision,
117 } = &inner_cast.to
118 {
119 // Get precision, default 6, cap at 6
120 let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122 // Create inner DATETIMEOFFSET cast
123 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124 this: inner_cast.this.clone(),
125 to: DataType::Custom {
126 name: format!("DATETIMEOFFSET({})", capped_precision),
127 },
128 trailing_comments: inner_cast.trailing_comments.clone(),
129 double_colon_syntax: false,
130 format: None,
131 default: None,
132 inferred_type: None,
133 }));
134
135 // Create new AT TIME ZONE with DATETIMEOFFSET
136 let new_at_tz =
137 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
138 this: datetimeoffset_cast,
139 zone: at_tz.zone.clone(),
140 }));
141
142 // Wrap in outer DATETIME2 cast
143 return Ok(Expression::Cast(Box::new(Cast {
144 this: new_at_tz,
145 to: DataType::Custom {
146 name: format!("DATETIME2({})", capped_precision),
147 },
148 trailing_comments: Vec::new(),
149 double_colon_syntax: false,
150 format: None,
151 default: None,
152 inferred_type: None,
153 })));
154 }
155 }
156 }
157
158 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
159 // Reference: Python sqlglot Fabric dialect unixtotime_sql
160 if let Expression::UnixToTime(ref f) = expr {
161 // Build: column * 1e6
162 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
163 left: (*f.this).clone(),
164 right: Expression::Literal(Literal::Number("1e6".to_string())),
165 left_comments: Vec::new(),
166 operator_comments: Vec::new(),
167 trailing_comments: Vec::new(),
168 inferred_type: None,
169 }));
170
171 // Build: ROUND(column * 1e6, 0)
172 let round_expr = Expression::Function(Box::new(Function::new(
173 "ROUND".to_string(),
174 vec![
175 column_times_1e6,
176 Expression::Literal(Literal::Number("0".to_string())),
177 ],
178 )));
179
180 // Build: CAST(ROUND(...) AS BIGINT)
181 let cast_to_bigint = Expression::Cast(Box::new(Cast {
182 this: round_expr,
183 to: DataType::BigInt { length: None },
184 trailing_comments: Vec::new(),
185 double_colon_syntax: false,
186 format: None,
187 default: None,
188 inferred_type: None,
189 }));
190
191 // Build: CAST('1970-01-01' AS DATETIME2(6))
192 let epoch_start = Expression::Cast(Box::new(Cast {
193 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
194 to: DataType::Custom {
195 name: "DATETIME2(6)".to_string(),
196 },
197 trailing_comments: Vec::new(),
198 double_colon_syntax: false,
199 format: None,
200 default: None,
201 inferred_type: None,
202 }));
203
204 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
205 let dateadd = Expression::Function(Box::new(Function::new(
206 "DATEADD".to_string(),
207 vec![
208 Expression::Identifier(Identifier::new("MICROSECONDS")),
209 cast_to_bigint,
210 epoch_start,
211 ],
212 )));
213
214 return Ok(dateadd);
215 }
216
217 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
218 // Reference: Python sqlglot Fabric dialect unixtotime_sql
219 if let Expression::Function(ref f) = expr {
220 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
221 let timestamp_input = f.args[0].clone();
222
223 // Build: column * 1e6
224 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
225 left: timestamp_input,
226 right: Expression::Literal(Literal::Number("1e6".to_string())),
227 left_comments: Vec::new(),
228 operator_comments: Vec::new(),
229 trailing_comments: Vec::new(),
230 inferred_type: None,
231 }));
232
233 // Build: ROUND(column * 1e6, 0)
234 let round_expr = Expression::Function(Box::new(Function::new(
235 "ROUND".to_string(),
236 vec![
237 column_times_1e6,
238 Expression::Literal(Literal::Number("0".to_string())),
239 ],
240 )));
241
242 // Build: CAST(ROUND(...) AS BIGINT)
243 let cast_to_bigint = Expression::Cast(Box::new(Cast {
244 this: round_expr,
245 to: DataType::BigInt { length: None },
246 trailing_comments: Vec::new(),
247 double_colon_syntax: false,
248 format: None,
249 default: None,
250 inferred_type: None,
251 }));
252
253 // Build: CAST('1970-01-01' AS DATETIME2(6))
254 let epoch_start = Expression::Cast(Box::new(Cast {
255 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
256 to: DataType::Custom {
257 name: "DATETIME2(6)".to_string(),
258 },
259 trailing_comments: Vec::new(),
260 double_colon_syntax: false,
261 format: None,
262 default: None,
263 inferred_type: None,
264 }));
265
266 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
267 let dateadd = Expression::Function(Box::new(Function::new(
268 "DATEADD".to_string(),
269 vec![
270 Expression::Identifier(Identifier::new("MICROSECONDS")),
271 cast_to_bigint,
272 epoch_start,
273 ],
274 )));
275
276 return Ok(dateadd);
277 }
278 }
279
280 // Delegate to T-SQL for other transformations
281 let tsql = TSQLDialect;
282 let transformed = tsql.transform_expr(expr)?;
283
284 // Apply Fabric-specific transformations to the result
285 self.transform_fabric_expr(transformed)
286 }
287}
288
289impl FabricDialect {
290 /// Fabric-specific expression transformations
291 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
292 match expr {
293 // Handle DataType expressions with Fabric-specific type mappings
294 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
295
296 // Pass through everything else
297 _ => Ok(expr),
298 }
299 }
300
301 /// Transform data types according to Fabric TYPE_MAPPING
302 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
303 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
304 let transformed = match dt {
305 // TIMESTAMP -> DATETIME2(6) with precision handling
306 // Note: TSQL already converts this to DATETIME2, but without precision
307 DataType::Timestamp { precision, .. } => {
308 let p = Self::cap_precision(precision, 6);
309 DataType::Custom {
310 name: format!("DATETIME2({})", p),
311 }
312 }
313
314 // TIME -> TIME(6) default, capped at 6
315 DataType::Time { precision, .. } => {
316 let p = Self::cap_precision(precision, 6);
317 DataType::Custom {
318 name: format!("TIME({})", p),
319 }
320 }
321
322 // INT -> INT (override TSQL which may output INTEGER)
323 DataType::Int { .. } => DataType::Custom {
324 name: "INT".to_string(),
325 },
326
327 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
328 DataType::Decimal { precision, scale } => {
329 if let (Some(p), Some(s)) = (&precision, &scale) {
330 DataType::Custom {
331 name: format!("DECIMAL({}, {})", p, s),
332 }
333 } else if let Some(p) = &precision {
334 DataType::Custom {
335 name: format!("DECIMAL({})", p),
336 }
337 } else {
338 DataType::Custom {
339 name: "DECIMAL".to_string(),
340 }
341 }
342 }
343
344 // JSON -> VARCHAR
345 DataType::Json => DataType::Custom {
346 name: "VARCHAR".to_string(),
347 },
348
349 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
350 DataType::Uuid => DataType::Custom {
351 name: "UNIQUEIDENTIFIER".to_string(),
352 },
353
354 // TinyInt -> SMALLINT
355 DataType::TinyInt { .. } => DataType::Custom {
356 name: "SMALLINT".to_string(),
357 },
358
359 // Handle Custom types for Fabric-specific mappings
360 DataType::Custom { ref name } => {
361 let upper = name.to_uppercase();
362
363 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
364 let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
365
366 match base_name.as_str() {
367 // DATETIME -> DATETIME2(6)
368 "DATETIME" => DataType::Custom {
369 name: "DATETIME2(6)".to_string(),
370 },
371
372 // SMALLDATETIME -> DATETIME2(6)
373 "SMALLDATETIME" => DataType::Custom {
374 name: "DATETIME2(6)".to_string(),
375 },
376
377 // DATETIME2 -> DATETIME2(6) default, cap at 6
378 "DATETIME2" => {
379 let p = Self::cap_precision(precision, 6);
380 DataType::Custom {
381 name: format!("DATETIME2({})", p),
382 }
383 }
384
385 // DATETIMEOFFSET -> cap precision at 6
386 "DATETIMEOFFSET" => {
387 let p = Self::cap_precision(precision, 6);
388 DataType::Custom {
389 name: format!("DATETIMEOFFSET({})", p),
390 }
391 }
392
393 // TIME -> TIME(6) default, cap at 6
394 "TIME" => {
395 let p = Self::cap_precision(precision, 6);
396 DataType::Custom {
397 name: format!("TIME({})", p),
398 }
399 }
400
401 // TIMESTAMP -> DATETIME2(6)
402 "TIMESTAMP" => DataType::Custom {
403 name: "DATETIME2(6)".to_string(),
404 },
405
406 // TIMESTAMPNTZ -> DATETIME2(6) with precision
407 "TIMESTAMPNTZ" => {
408 let p = Self::cap_precision(precision, 6);
409 DataType::Custom {
410 name: format!("DATETIME2({})", p),
411 }
412 }
413
414 // TIMESTAMPTZ -> DATETIME2(6) with precision
415 "TIMESTAMPTZ" => {
416 let p = Self::cap_precision(precision, 6);
417 DataType::Custom {
418 name: format!("DATETIME2({})", p),
419 }
420 }
421
422 // IMAGE -> VARBINARY
423 "IMAGE" => DataType::Custom {
424 name: "VARBINARY".to_string(),
425 },
426
427 // MONEY -> DECIMAL
428 "MONEY" => DataType::Custom {
429 name: "DECIMAL".to_string(),
430 },
431
432 // SMALLMONEY -> DECIMAL
433 "SMALLMONEY" => DataType::Custom {
434 name: "DECIMAL".to_string(),
435 },
436
437 // NCHAR -> CHAR (with length preserved)
438 "NCHAR" => {
439 if let Some(len) = precision {
440 DataType::Custom {
441 name: format!("CHAR({})", len),
442 }
443 } else {
444 DataType::Custom {
445 name: "CHAR".to_string(),
446 }
447 }
448 }
449
450 // NVARCHAR -> VARCHAR (with length preserved)
451 "NVARCHAR" => {
452 if let Some(len) = precision {
453 DataType::Custom {
454 name: format!("VARCHAR({})", len),
455 }
456 } else {
457 DataType::Custom {
458 name: "VARCHAR".to_string(),
459 }
460 }
461 }
462
463 // TINYINT -> SMALLINT
464 "TINYINT" => DataType::Custom {
465 name: "SMALLINT".to_string(),
466 },
467
468 // UTINYINT -> SMALLINT
469 "UTINYINT" => DataType::Custom {
470 name: "SMALLINT".to_string(),
471 },
472
473 // VARIANT -> SQL_VARIANT
474 "VARIANT" => DataType::Custom {
475 name: "SQL_VARIANT".to_string(),
476 },
477
478 // XML -> VARCHAR
479 "XML" => DataType::Custom {
480 name: "VARCHAR".to_string(),
481 },
482
483 // NUMERIC -> DECIMAL (override TSQL's conversion)
484 // Fabric uses DECIMAL, not NUMERIC
485 "NUMERIC" => {
486 if let (Some(p), Some(s)) = (precision, scale) {
487 DataType::Custom {
488 name: format!("DECIMAL({}, {})", p, s),
489 }
490 } else if let Some(p) = precision {
491 DataType::Custom {
492 name: format!("DECIMAL({})", p),
493 }
494 } else {
495 DataType::Custom {
496 name: "DECIMAL".to_string(),
497 }
498 }
499 }
500
501 // Pass through other custom types unchanged
502 _ => dt,
503 }
504 }
505
506 // Keep all other types as transformed by TSQL
507 other => other,
508 };
509
510 Ok(Expression::DataType(transformed))
511 }
512
513 /// Cap precision to max value, defaulting to max if not specified
514 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
515 match precision {
516 Some(p) if p > max => max,
517 Some(p) => p,
518 None => max, // Default to max if not specified
519 }
520 }
521
522 /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
523 fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
524 if let Some(paren_pos) = name.find('(') {
525 let base = name[..paren_pos].to_string();
526 let rest = &name[paren_pos + 1..];
527 if let Some(close_pos) = rest.find(')') {
528 let args = &rest[..close_pos];
529 let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
530
531 let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
532 let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
533
534 return (base, precision, scale);
535 }
536 (base, None, None)
537 } else {
538 (name.to_string(), None, None)
539 }
540 }
541}