polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Fabric
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 // Inherit from T-SQL
33 let tsql = TSQLDialect;
34 tsql.tokenizer_config()
35 }
36
37 fn generator_config(&self) -> GeneratorConfig {
38 use crate::generator::IdentifierQuoteStyle;
39 // Inherit from T-SQL with Fabric dialect type
40 GeneratorConfig {
41 // Use square brackets like T-SQL
42 identifier_quote: '[',
43 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44 dialect: Some(DialectType::Fabric),
45 ..Default::default()
46 }
47 }
48
49 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
51 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
52 if let Expression::CreateTable(mut ct) = expr {
53 for col in &mut ct.columns {
54 match &col.data_type {
55 DataType::VarChar { length: None, .. } => {
56 col.data_type = DataType::VarChar {
57 length: Some(1),
58 parenthesized_length: false,
59 };
60 }
61 DataType::Char { length: None } => {
62 col.data_type = DataType::Char { length: Some(1) };
63 }
64 _ => {}
65 }
66 // Also transform column data types through Fabric's type mappings
67 if let Expression::DataType(new_dt) =
68 self.transform_fabric_data_type(col.data_type.clone())?
69 {
70 col.data_type = new_dt;
71 }
72 }
73 return Ok(Expression::CreateTable(ct));
74 }
75
76 // Handle DataType::Timestamp specially BEFORE T-SQL transform
77 // because TSQL loses precision info when converting Timestamp to DATETIME2
78 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
79 let p = FabricDialect::cap_precision(*precision, 6);
80 return Ok(Expression::DataType(DataType::Custom {
81 name: format!("DATETIME2({})", p),
82 }));
83 }
84
85 // Handle DataType::Time specially BEFORE T-SQL transform
86 // to ensure we get default precision of 6
87 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
88 let p = FabricDialect::cap_precision(*precision, 6);
89 return Ok(Expression::DataType(DataType::Custom {
90 name: format!("TIME({})", p),
91 }));
92 }
93
94 // Handle DataType::Decimal specially BEFORE T-SQL transform
95 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
96 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
97 let name = if let (Some(p), Some(s)) = (precision, scale) {
98 format!("DECIMAL({}, {})", p, s)
99 } else if let Some(p) = precision {
100 format!("DECIMAL({})", p)
101 } else {
102 "DECIMAL".to_string()
103 };
104 return Ok(Expression::DataType(DataType::Custom { name }));
105 }
106
107 // Handle AT TIME ZONE with TIMESTAMPTZ cast
108 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
109 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
110 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
111 if let Expression::AtTimeZone(ref at_tz) = expr {
112 // Check if this contains a TIMESTAMPTZ cast
113 if let Expression::Cast(ref inner_cast) = at_tz.this {
114 if let DataType::Timestamp {
115 timezone: true,
116 precision,
117 } = &inner_cast.to
118 {
119 // Get precision, default 6, cap at 6
120 let capped_precision = FabricDialect::cap_precision(*precision, 6);
121
122 // Create inner DATETIMEOFFSET cast
123 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
124 this: inner_cast.this.clone(),
125 to: DataType::Custom {
126 name: format!("DATETIMEOFFSET({})", capped_precision),
127 },
128 trailing_comments: inner_cast.trailing_comments.clone(),
129 double_colon_syntax: false,
130 format: None,
131 default: None,
132 }));
133
134 // Create new AT TIME ZONE with DATETIMEOFFSET
135 let new_at_tz =
136 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
137 this: datetimeoffset_cast,
138 zone: at_tz.zone.clone(),
139 }));
140
141 // Wrap in outer DATETIME2 cast
142 return Ok(Expression::Cast(Box::new(Cast {
143 this: new_at_tz,
144 to: DataType::Custom {
145 name: format!("DATETIME2({})", capped_precision),
146 },
147 trailing_comments: Vec::new(),
148 double_colon_syntax: false,
149 format: None,
150 default: None,
151 })));
152 }
153 }
154 }
155
156 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
157 // Reference: Python sqlglot Fabric dialect unixtotime_sql
158 if let Expression::UnixToTime(ref f) = expr {
159 // Build: column * 1e6
160 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
161 left: (*f.this).clone(),
162 right: Expression::Literal(Literal::Number("1e6".to_string())),
163 left_comments: Vec::new(),
164 operator_comments: Vec::new(),
165 trailing_comments: Vec::new(),
166 }));
167
168 // Build: ROUND(column * 1e6, 0)
169 let round_expr = Expression::Function(Box::new(Function::new(
170 "ROUND".to_string(),
171 vec![
172 column_times_1e6,
173 Expression::Literal(Literal::Number("0".to_string())),
174 ],
175 )));
176
177 // Build: CAST(ROUND(...) AS BIGINT)
178 let cast_to_bigint = Expression::Cast(Box::new(Cast {
179 this: round_expr,
180 to: DataType::BigInt { length: None },
181 trailing_comments: Vec::new(),
182 double_colon_syntax: false,
183 format: None,
184 default: None,
185 }));
186
187 // Build: CAST('1970-01-01' AS DATETIME2(6))
188 let epoch_start = Expression::Cast(Box::new(Cast {
189 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
190 to: DataType::Custom {
191 name: "DATETIME2(6)".to_string(),
192 },
193 trailing_comments: Vec::new(),
194 double_colon_syntax: false,
195 format: None,
196 default: None,
197 }));
198
199 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
200 let dateadd = Expression::Function(Box::new(Function::new(
201 "DATEADD".to_string(),
202 vec![
203 Expression::Identifier(Identifier::new("MICROSECONDS")),
204 cast_to_bigint,
205 epoch_start,
206 ],
207 )));
208
209 return Ok(dateadd);
210 }
211
212 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
213 // Reference: Python sqlglot Fabric dialect unixtotime_sql
214 if let Expression::Function(ref f) = expr {
215 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
216 let timestamp_input = f.args[0].clone();
217
218 // Build: column * 1e6
219 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
220 left: timestamp_input,
221 right: Expression::Literal(Literal::Number("1e6".to_string())),
222 left_comments: Vec::new(),
223 operator_comments: Vec::new(),
224 trailing_comments: Vec::new(),
225 }));
226
227 // Build: ROUND(column * 1e6, 0)
228 let round_expr = Expression::Function(Box::new(Function::new(
229 "ROUND".to_string(),
230 vec![
231 column_times_1e6,
232 Expression::Literal(Literal::Number("0".to_string())),
233 ],
234 )));
235
236 // Build: CAST(ROUND(...) AS BIGINT)
237 let cast_to_bigint = Expression::Cast(Box::new(Cast {
238 this: round_expr,
239 to: DataType::BigInt { length: None },
240 trailing_comments: Vec::new(),
241 double_colon_syntax: false,
242 format: None,
243 default: None,
244 }));
245
246 // Build: CAST('1970-01-01' AS DATETIME2(6))
247 let epoch_start = Expression::Cast(Box::new(Cast {
248 this: Expression::Literal(Literal::String("1970-01-01".to_string())),
249 to: DataType::Custom {
250 name: "DATETIME2(6)".to_string(),
251 },
252 trailing_comments: Vec::new(),
253 double_colon_syntax: false,
254 format: None,
255 default: None,
256 }));
257
258 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
259 let dateadd = Expression::Function(Box::new(Function::new(
260 "DATEADD".to_string(),
261 vec![
262 Expression::Identifier(Identifier::new("MICROSECONDS")),
263 cast_to_bigint,
264 epoch_start,
265 ],
266 )));
267
268 return Ok(dateadd);
269 }
270 }
271
272 // Delegate to T-SQL for other transformations
273 let tsql = TSQLDialect;
274 let transformed = tsql.transform_expr(expr)?;
275
276 // Apply Fabric-specific transformations to the result
277 self.transform_fabric_expr(transformed)
278 }
279}
280
281impl FabricDialect {
282 /// Fabric-specific expression transformations
283 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
284 match expr {
285 // Handle DataType expressions with Fabric-specific type mappings
286 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
287
288 // Pass through everything else
289 _ => Ok(expr),
290 }
291 }
292
293 /// Transform data types according to Fabric TYPE_MAPPING
294 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
295 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
296 let transformed = match dt {
297 // TIMESTAMP -> DATETIME2(6) with precision handling
298 // Note: TSQL already converts this to DATETIME2, but without precision
299 DataType::Timestamp { precision, .. } => {
300 let p = Self::cap_precision(precision, 6);
301 DataType::Custom {
302 name: format!("DATETIME2({})", p),
303 }
304 }
305
306 // TIME -> TIME(6) default, capped at 6
307 DataType::Time { precision, .. } => {
308 let p = Self::cap_precision(precision, 6);
309 DataType::Custom {
310 name: format!("TIME({})", p),
311 }
312 }
313
314 // INT -> INT (override TSQL which may output INTEGER)
315 DataType::Int { .. } => DataType::Custom {
316 name: "INT".to_string(),
317 },
318
319 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
320 DataType::Decimal { precision, scale } => {
321 if let (Some(p), Some(s)) = (&precision, &scale) {
322 DataType::Custom {
323 name: format!("DECIMAL({}, {})", p, s),
324 }
325 } else if let Some(p) = &precision {
326 DataType::Custom {
327 name: format!("DECIMAL({})", p),
328 }
329 } else {
330 DataType::Custom {
331 name: "DECIMAL".to_string(),
332 }
333 }
334 }
335
336 // JSON -> VARCHAR
337 DataType::Json => DataType::Custom {
338 name: "VARCHAR".to_string(),
339 },
340
341 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
342 DataType::Uuid => DataType::Custom {
343 name: "UNIQUEIDENTIFIER".to_string(),
344 },
345
346 // TinyInt -> SMALLINT
347 DataType::TinyInt { .. } => DataType::Custom {
348 name: "SMALLINT".to_string(),
349 },
350
351 // Handle Custom types for Fabric-specific mappings
352 DataType::Custom { ref name } => {
353 let upper = name.to_uppercase();
354
355 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
356 let (base_name, precision, scale) = Self::parse_type_precision_and_scale(&upper);
357
358 match base_name.as_str() {
359 // DATETIME -> DATETIME2(6)
360 "DATETIME" => DataType::Custom {
361 name: "DATETIME2(6)".to_string(),
362 },
363
364 // SMALLDATETIME -> DATETIME2(6)
365 "SMALLDATETIME" => DataType::Custom {
366 name: "DATETIME2(6)".to_string(),
367 },
368
369 // DATETIME2 -> DATETIME2(6) default, cap at 6
370 "DATETIME2" => {
371 let p = Self::cap_precision(precision, 6);
372 DataType::Custom {
373 name: format!("DATETIME2({})", p),
374 }
375 }
376
377 // DATETIMEOFFSET -> cap precision at 6
378 "DATETIMEOFFSET" => {
379 let p = Self::cap_precision(precision, 6);
380 DataType::Custom {
381 name: format!("DATETIMEOFFSET({})", p),
382 }
383 }
384
385 // TIME -> TIME(6) default, cap at 6
386 "TIME" => {
387 let p = Self::cap_precision(precision, 6);
388 DataType::Custom {
389 name: format!("TIME({})", p),
390 }
391 }
392
393 // TIMESTAMP -> DATETIME2(6)
394 "TIMESTAMP" => DataType::Custom {
395 name: "DATETIME2(6)".to_string(),
396 },
397
398 // TIMESTAMPNTZ -> DATETIME2(6) with precision
399 "TIMESTAMPNTZ" => {
400 let p = Self::cap_precision(precision, 6);
401 DataType::Custom {
402 name: format!("DATETIME2({})", p),
403 }
404 }
405
406 // TIMESTAMPTZ -> DATETIME2(6) with precision
407 "TIMESTAMPTZ" => {
408 let p = Self::cap_precision(precision, 6);
409 DataType::Custom {
410 name: format!("DATETIME2({})", p),
411 }
412 }
413
414 // IMAGE -> VARBINARY
415 "IMAGE" => DataType::Custom {
416 name: "VARBINARY".to_string(),
417 },
418
419 // MONEY -> DECIMAL
420 "MONEY" => DataType::Custom {
421 name: "DECIMAL".to_string(),
422 },
423
424 // SMALLMONEY -> DECIMAL
425 "SMALLMONEY" => DataType::Custom {
426 name: "DECIMAL".to_string(),
427 },
428
429 // NCHAR -> CHAR (with length preserved)
430 "NCHAR" => {
431 if let Some(len) = precision {
432 DataType::Custom {
433 name: format!("CHAR({})", len),
434 }
435 } else {
436 DataType::Custom {
437 name: "CHAR".to_string(),
438 }
439 }
440 }
441
442 // NVARCHAR -> VARCHAR (with length preserved)
443 "NVARCHAR" => {
444 if let Some(len) = precision {
445 DataType::Custom {
446 name: format!("VARCHAR({})", len),
447 }
448 } else {
449 DataType::Custom {
450 name: "VARCHAR".to_string(),
451 }
452 }
453 }
454
455 // TINYINT -> SMALLINT
456 "TINYINT" => DataType::Custom {
457 name: "SMALLINT".to_string(),
458 },
459
460 // UTINYINT -> SMALLINT
461 "UTINYINT" => DataType::Custom {
462 name: "SMALLINT".to_string(),
463 },
464
465 // VARIANT -> SQL_VARIANT
466 "VARIANT" => DataType::Custom {
467 name: "SQL_VARIANT".to_string(),
468 },
469
470 // XML -> VARCHAR
471 "XML" => DataType::Custom {
472 name: "VARCHAR".to_string(),
473 },
474
475 // NUMERIC -> DECIMAL (override TSQL's conversion)
476 // Fabric uses DECIMAL, not NUMERIC
477 "NUMERIC" => {
478 if let (Some(p), Some(s)) = (precision, scale) {
479 DataType::Custom {
480 name: format!("DECIMAL({}, {})", p, s),
481 }
482 } else if let Some(p) = precision {
483 DataType::Custom {
484 name: format!("DECIMAL({})", p),
485 }
486 } else {
487 DataType::Custom {
488 name: "DECIMAL".to_string(),
489 }
490 }
491 }
492
493 // Pass through other custom types unchanged
494 _ => dt,
495 }
496 }
497
498 // Keep all other types as transformed by TSQL
499 other => other,
500 };
501
502 Ok(Expression::DataType(transformed))
503 }
504
505 /// Cap precision to max value, defaulting to max if not specified
506 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
507 match precision {
508 Some(p) if p > max => max,
509 Some(p) => p,
510 None => max, // Default to max if not specified
511 }
512 }
513
514 /// Parse type name and optional precision/scale from strings like "DATETIME2(7)" or "NUMERIC(10, 2)"
515 fn parse_type_precision_and_scale(name: &str) -> (String, Option<u32>, Option<u32>) {
516 if let Some(paren_pos) = name.find('(') {
517 let base = name[..paren_pos].to_string();
518 let rest = &name[paren_pos + 1..];
519 if let Some(close_pos) = rest.find(')') {
520 let args = &rest[..close_pos];
521 let parts: Vec<&str> = args.split(',').map(|s| s.trim()).collect();
522
523 let precision = parts.first().and_then(|s| s.parse::<u32>().ok());
524 let scale = parts.get(1).and_then(|s| s.parse::<u32>().ok());
525
526 return (base, precision, scale);
527 }
528 (base, None, None)
529 } else {
530 (name.to_string(), None, None)
531 }
532 }
533}