polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20use crate::generator::GeneratorConfig;
21use crate::tokens::TokenizerConfig;
22
23/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
24pub struct FabricDialect;
25
26impl DialectImpl for FabricDialect {
27 fn dialect_type(&self) -> DialectType {
28 DialectType::Fabric
29 }
30
31 fn tokenizer_config(&self) -> TokenizerConfig {
32 // Inherit from T-SQL
33 let tsql = TSQLDialect;
34 tsql.tokenizer_config()
35 }
36
37 fn generator_config(&self) -> GeneratorConfig {
38 use crate::generator::IdentifierQuoteStyle;
39 // Inherit from T-SQL with Fabric dialect type
40 GeneratorConfig {
41 // Use square brackets like T-SQL
42 identifier_quote: '[',
43 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
44 dialect: Some(DialectType::Fabric),
45 null_ordering_supported: false,
46 ..Default::default()
47 }
48 }
49
50 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
51 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
52 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
53 if let Expression::CreateTable(mut ct) = expr {
54 for col in &mut ct.columns {
55 match &col.data_type {
56 DataType::VarChar { length: None, .. } => {
57 col.data_type = DataType::VarChar {
58 length: Some(1),
59 parenthesized_length: false,
60 };
61 }
62 DataType::Char { length: None } => {
63 col.data_type = DataType::Char { length: Some(1) };
64 }
65 _ => {}
66 }
67 // Also transform column data types through Fabric's type mappings.
68 // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
69 let tsql = TSQLDialect;
70 if let Ok(Expression::DataType(tsql_dt)) =
71 tsql.transform_data_type(col.data_type.clone())
72 {
73 col.data_type = tsql_dt;
74 }
75 if let Expression::DataType(new_dt) =
76 self.transform_fabric_data_type(col.data_type.clone())?
77 {
78 col.data_type = new_dt;
79 }
80 }
81 return Ok(Expression::CreateTable(ct));
82 }
83
84 // Handle DataType::Timestamp specially BEFORE T-SQL transform
85 // because TSQL loses precision info when converting Timestamp to DATETIME2
86 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
87 let p = FabricDialect::cap_precision(*precision, 6);
88 return Ok(Expression::DataType(DataType::Custom {
89 name: format!("DATETIME2({})", p),
90 }));
91 }
92
93 // Handle DataType::Time specially BEFORE T-SQL transform
94 // to ensure we get default precision of 6
95 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
96 let p = FabricDialect::cap_precision(*precision, 6);
97 return Ok(Expression::DataType(DataType::Custom {
98 name: format!("TIME({})", p),
99 }));
100 }
101
102 // Handle DataType::Decimal specially BEFORE T-SQL transform
103 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
104 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
105 let name = if let (Some(p), Some(s)) = (precision, scale) {
106 format!("DECIMAL({}, {})", p, s)
107 } else if let Some(p) = precision {
108 format!("DECIMAL({})", p)
109 } else {
110 "DECIMAL".to_string()
111 };
112 return Ok(Expression::DataType(DataType::Custom { name }));
113 }
114
115 // Handle AT TIME ZONE with TIMESTAMPTZ cast
116 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
117 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
118 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
119 if let Expression::AtTimeZone(ref at_tz) = expr {
120 // Check if this contains a TIMESTAMPTZ cast
121 if let Expression::Cast(ref inner_cast) = at_tz.this {
122 if let DataType::Timestamp {
123 timezone: true,
124 precision,
125 } = &inner_cast.to
126 {
127 // Get precision, default 6, cap at 6
128 let capped_precision = FabricDialect::cap_precision(*precision, 6);
129
130 // Create inner DATETIMEOFFSET cast
131 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
132 this: inner_cast.this.clone(),
133 to: DataType::Custom {
134 name: format!("DATETIMEOFFSET({})", capped_precision),
135 },
136 trailing_comments: inner_cast.trailing_comments.clone(),
137 double_colon_syntax: false,
138 format: None,
139 default: None,
140 inferred_type: None,
141 }));
142
143 // Create new AT TIME ZONE with DATETIMEOFFSET
144 let new_at_tz =
145 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
146 this: datetimeoffset_cast,
147 zone: at_tz.zone.clone(),
148 }));
149
150 // Wrap in outer DATETIME2 cast
151 return Ok(Expression::Cast(Box::new(Cast {
152 this: new_at_tz,
153 to: DataType::Custom {
154 name: format!("DATETIME2({})", capped_precision),
155 },
156 trailing_comments: Vec::new(),
157 double_colon_syntax: false,
158 format: None,
159 default: None,
160 inferred_type: None,
161 })));
162 }
163 }
164 }
165
166 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
167 // Reference: Python sqlglot Fabric dialect unixtotime_sql
168 if let Expression::UnixToTime(ref f) = expr {
169 // Build: column * 1e6
170 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
171 left: (*f.this).clone(),
172 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
173 left_comments: Vec::new(),
174 operator_comments: Vec::new(),
175 trailing_comments: Vec::new(),
176 inferred_type: None,
177 }));
178
179 // Build: ROUND(column * 1e6, 0)
180 let round_expr = Expression::Function(Box::new(Function::new(
181 "ROUND".to_string(),
182 vec![
183 column_times_1e6,
184 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
185 ],
186 )));
187
188 // Build: CAST(ROUND(...) AS BIGINT)
189 let cast_to_bigint = Expression::Cast(Box::new(Cast {
190 this: round_expr,
191 to: DataType::BigInt { length: None },
192 trailing_comments: Vec::new(),
193 double_colon_syntax: false,
194 format: None,
195 default: None,
196 inferred_type: None,
197 }));
198
199 // Build: CAST('1970-01-01' AS DATETIME2(6))
200 let epoch_start = Expression::Cast(Box::new(Cast {
201 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
202 to: DataType::Custom {
203 name: "DATETIME2(6)".to_string(),
204 },
205 trailing_comments: Vec::new(),
206 double_colon_syntax: false,
207 format: None,
208 default: None,
209 inferred_type: None,
210 }));
211
212 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
213 let dateadd = Expression::Function(Box::new(Function::new(
214 "DATEADD".to_string(),
215 vec![
216 Expression::Identifier(Identifier::new("MICROSECONDS")),
217 cast_to_bigint,
218 epoch_start,
219 ],
220 )));
221
222 return Ok(dateadd);
223 }
224
225 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
226 // Reference: Python sqlglot Fabric dialect unixtotime_sql
227 if let Expression::Function(ref f) = expr {
228 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
229 let timestamp_input = f.args[0].clone();
230
231 // Build: column * 1e6
232 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
233 left: timestamp_input,
234 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
235 left_comments: Vec::new(),
236 operator_comments: Vec::new(),
237 trailing_comments: Vec::new(),
238 inferred_type: None,
239 }));
240
241 // Build: ROUND(column * 1e6, 0)
242 let round_expr = Expression::Function(Box::new(Function::new(
243 "ROUND".to_string(),
244 vec![
245 column_times_1e6,
246 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
247 ],
248 )));
249
250 // Build: CAST(ROUND(...) AS BIGINT)
251 let cast_to_bigint = Expression::Cast(Box::new(Cast {
252 this: round_expr,
253 to: DataType::BigInt { length: None },
254 trailing_comments: Vec::new(),
255 double_colon_syntax: false,
256 format: None,
257 default: None,
258 inferred_type: None,
259 }));
260
261 // Build: CAST('1970-01-01' AS DATETIME2(6))
262 let epoch_start = Expression::Cast(Box::new(Cast {
263 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
264 to: DataType::Custom {
265 name: "DATETIME2(6)".to_string(),
266 },
267 trailing_comments: Vec::new(),
268 double_colon_syntax: false,
269 format: None,
270 default: None,
271 inferred_type: None,
272 }));
273
274 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
275 let dateadd = Expression::Function(Box::new(Function::new(
276 "DATEADD".to_string(),
277 vec![
278 Expression::Identifier(Identifier::new("MICROSECONDS")),
279 cast_to_bigint,
280 epoch_start,
281 ],
282 )));
283
284 return Ok(dateadd);
285 }
286 }
287
288 // Delegate to T-SQL for other transformations
289 let tsql = TSQLDialect;
290 let transformed = tsql.transform_expr(expr)?;
291
292 // Apply Fabric-specific transformations to the result
293 self.transform_fabric_expr(transformed)
294 }
295}
296
297impl FabricDialect {
298 /// Fabric-specific expression transformations
299 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
300 match expr {
301 // Handle DataType expressions with Fabric-specific type mappings
302 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
303
304 // Pass through everything else
305 _ => Ok(expr),
306 }
307 }
308
309 /// Transform data types according to Fabric TYPE_MAPPING
310 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
311 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
312 let transformed = match dt {
313 // TIMESTAMP -> DATETIME2(6) with precision handling
314 // Note: TSQL already converts this to DATETIME2, but without precision
315 DataType::Timestamp { precision, .. } => {
316 let p = Self::cap_precision(precision, 6);
317 DataType::Custom {
318 name: format!("DATETIME2({})", p),
319 }
320 }
321
322 // TIME -> TIME(6) default, capped at 6
323 DataType::Time { precision, .. } => {
324 let p = Self::cap_precision(precision, 6);
325 DataType::Custom {
326 name: format!("TIME({})", p),
327 }
328 }
329
330 // INT -> INT (override TSQL which may output INTEGER)
331 DataType::Int { .. } => DataType::Custom {
332 name: "INT".to_string(),
333 },
334
335 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
336 DataType::Decimal { precision, scale } => {
337 if let (Some(p), Some(s)) = (&precision, &scale) {
338 DataType::Custom {
339 name: format!("DECIMAL({}, {})", p, s),
340 }
341 } else if let Some(p) = &precision {
342 DataType::Custom {
343 name: format!("DECIMAL({})", p),
344 }
345 } else {
346 DataType::Custom {
347 name: "DECIMAL".to_string(),
348 }
349 }
350 }
351
352 // JSON -> VARCHAR
353 DataType::Json => DataType::Custom {
354 name: "VARCHAR".to_string(),
355 },
356
357 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
358 DataType::Uuid => DataType::Custom {
359 name: "UNIQUEIDENTIFIER".to_string(),
360 },
361
362 // TinyInt -> SMALLINT
363 DataType::TinyInt { .. } => DataType::Custom {
364 name: "SMALLINT".to_string(),
365 },
366
367 // Handle Custom types for Fabric-specific mappings
368 DataType::Custom { ref name } => {
369 let upper = name.to_uppercase();
370
371 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
372 let (base_name, precision, scale) =
373 TSQLDialect::parse_type_precision_and_scale(&upper);
374
375 match base_name.as_str() {
376 // DATETIME -> DATETIME2(6)
377 "DATETIME" => DataType::Custom {
378 name: "DATETIME2(6)".to_string(),
379 },
380
381 // SMALLDATETIME -> DATETIME2(6)
382 "SMALLDATETIME" => DataType::Custom {
383 name: "DATETIME2(6)".to_string(),
384 },
385
386 // DATETIME2 -> DATETIME2(6) default, cap at 6
387 "DATETIME2" => {
388 let p = Self::cap_precision(precision, 6);
389 DataType::Custom {
390 name: format!("DATETIME2({})", p),
391 }
392 }
393
394 // DATETIMEOFFSET -> cap precision at 6
395 "DATETIMEOFFSET" => {
396 let p = Self::cap_precision(precision, 6);
397 DataType::Custom {
398 name: format!("DATETIMEOFFSET({})", p),
399 }
400 }
401
402 // TIME -> TIME(6) default, cap at 6
403 "TIME" => {
404 let p = Self::cap_precision(precision, 6);
405 DataType::Custom {
406 name: format!("TIME({})", p),
407 }
408 }
409
410 // TIMESTAMP -> DATETIME2(6)
411 "TIMESTAMP" => DataType::Custom {
412 name: "DATETIME2(6)".to_string(),
413 },
414
415 // TIMESTAMPNTZ -> DATETIME2(6) with precision
416 "TIMESTAMPNTZ" => {
417 let p = Self::cap_precision(precision, 6);
418 DataType::Custom {
419 name: format!("DATETIME2({})", p),
420 }
421 }
422
423 // TIMESTAMPTZ -> DATETIME2(6) with precision
424 "TIMESTAMPTZ" => {
425 let p = Self::cap_precision(precision, 6);
426 DataType::Custom {
427 name: format!("DATETIME2({})", p),
428 }
429 }
430
431 // IMAGE -> VARBINARY
432 "IMAGE" => DataType::Custom {
433 name: "VARBINARY".to_string(),
434 },
435
436 // MONEY -> DECIMAL
437 "MONEY" => DataType::Custom {
438 name: "DECIMAL".to_string(),
439 },
440
441 // SMALLMONEY -> DECIMAL
442 "SMALLMONEY" => DataType::Custom {
443 name: "DECIMAL".to_string(),
444 },
445
446 // NCHAR -> CHAR (with length preserved)
447 "NCHAR" => {
448 if let Some(len) = precision {
449 DataType::Custom {
450 name: format!("CHAR({})", len),
451 }
452 } else {
453 DataType::Custom {
454 name: "CHAR".to_string(),
455 }
456 }
457 }
458
459 // NVARCHAR -> VARCHAR (with length preserved)
460 "NVARCHAR" => {
461 if let Some(len) = precision {
462 DataType::Custom {
463 name: format!("VARCHAR({})", len),
464 }
465 } else {
466 DataType::Custom {
467 name: "VARCHAR".to_string(),
468 }
469 }
470 }
471
472 // TINYINT -> SMALLINT
473 "TINYINT" => DataType::Custom {
474 name: "SMALLINT".to_string(),
475 },
476
477 // UTINYINT -> SMALLINT
478 "UTINYINT" => DataType::Custom {
479 name: "SMALLINT".to_string(),
480 },
481
482 // VARIANT -> SQL_VARIANT
483 "VARIANT" => DataType::Custom {
484 name: "SQL_VARIANT".to_string(),
485 },
486
487 // XML -> VARCHAR
488 "XML" => DataType::Custom {
489 name: "VARCHAR".to_string(),
490 },
491
492 // NUMERIC -> DECIMAL (override TSQL's conversion)
493 // Fabric uses DECIMAL, not NUMERIC
494 "NUMERIC" => {
495 if let (Some(p), Some(s)) = (precision, scale) {
496 DataType::Custom {
497 name: format!("DECIMAL({}, {})", p, s),
498 }
499 } else if let Some(p) = precision {
500 DataType::Custom {
501 name: format!("DECIMAL({})", p),
502 }
503 } else {
504 DataType::Custom {
505 name: "DECIMAL".to_string(),
506 }
507 }
508 }
509
510 // Pass through other custom types unchanged
511 _ => dt,
512 }
513 }
514
515 // Keep all other types as transformed by TSQL
516 other => other,
517 };
518
519 Ok(Expression::DataType(transformed))
520 }
521
522 /// Cap precision to max value, defaulting to max if not specified
523 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
524 match precision {
525 Some(p) if p > max => max,
526 Some(p) => p,
527 None => max, // Default to max if not specified
528 }
529 }
530}