polyglot_sql/dialects/fabric.rs
1//! Microsoft Fabric Data Warehouse Dialect
2//!
3//! Fabric-specific SQL dialect based on sqlglot patterns.
4//! Fabric inherits from T-SQL with specific differences.
5//!
6//! References:
7//! - Data Types: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
8//! - T-SQL Surface Area: https://learn.microsoft.com/en-us/fabric/data-warehouse/tsql-surface-area
9//!
10//! Key differences from T-SQL:
11//! - Case-sensitive identifiers (unlike T-SQL which is case-insensitive)
12//! - Limited data type support with mappings to supported alternatives
13//! - Temporal types (DATETIME2, DATETIMEOFFSET, TIME) limited to 6 digits precision
14//! - Certain legacy types (MONEY, SMALLMONEY, etc.) are not supported
15//! - Unicode types (NCHAR, NVARCHAR) are mapped to non-unicode equivalents
16
17use super::{DialectImpl, DialectType, TSQLDialect};
18use crate::error::Result;
19use crate::expressions::{BinaryOp, Cast, DataType, Expression, Function, Identifier, Literal};
20#[cfg(feature = "generate")]
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24/// Microsoft Fabric Data Warehouse dialect (based on T-SQL)
25pub struct FabricDialect;
26
27impl DialectImpl for FabricDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Fabric
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 // Inherit from T-SQL
34 let tsql = TSQLDialect;
35 tsql.tokenizer_config()
36 }
37
38 #[cfg(feature = "generate")]
39
40 fn generator_config(&self) -> GeneratorConfig {
41 use crate::generator::IdentifierQuoteStyle;
42 // Inherit from T-SQL with Fabric dialect type
43 GeneratorConfig {
44 // Use square brackets like T-SQL
45 identifier_quote: '[',
46 identifier_quote_style: IdentifierQuoteStyle::BRACKET,
47 dialect: Some(DialectType::Fabric),
48 null_ordering_supported: false,
49 aggregate_filter_supported: false,
50 cte_recursive_keyword_required: false,
51 ..Default::default()
52 }
53 }
54
55 #[cfg(feature = "transpile")]
56
57 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58 // Handle CreateTable specially - add default precision of 1 to VARCHAR/CHAR without length
59 // Reference: Python sqlglot Fabric dialect parser._parse_create adds default precision
60 if let Expression::CreateTable(mut ct) = expr {
61 for col in &mut ct.columns {
62 match &col.data_type {
63 DataType::VarChar { length: None, .. } => {
64 col.data_type = DataType::VarChar {
65 length: Some(1),
66 parenthesized_length: false,
67 };
68 }
69 DataType::Char { length: None } => {
70 col.data_type = DataType::Char { length: Some(1) };
71 }
72 _ => {}
73 }
74 // Also transform column data types through Fabric's type mappings.
75 // Apply TSQL normalisation first (e.g. BPCHAR → Char), then Fabric-specific.
76 let tsql = TSQLDialect;
77 if let Ok(Expression::DataType(tsql_dt)) =
78 tsql.transform_data_type(col.data_type.clone())
79 {
80 col.data_type = tsql_dt;
81 }
82 if let Expression::DataType(new_dt) =
83 self.transform_fabric_data_type(col.data_type.clone())?
84 {
85 col.data_type = new_dt;
86 }
87 }
88 return Ok(Expression::CreateTable(ct));
89 }
90
91 // Handle DataType::Timestamp specially BEFORE T-SQL transform
92 // because TSQL loses precision info when converting Timestamp to DATETIME2
93 if let Expression::DataType(DataType::Timestamp { precision, .. }) = &expr {
94 let p = FabricDialect::cap_precision(*precision, 6);
95 return Ok(Expression::DataType(DataType::Custom {
96 name: format!("DATETIME2({})", p),
97 }));
98 }
99
100 // Handle DataType::Time specially BEFORE T-SQL transform
101 // to ensure we get default precision of 6
102 if let Expression::DataType(DataType::Time { precision, .. }) = &expr {
103 let p = FabricDialect::cap_precision(*precision, 6);
104 return Ok(Expression::DataType(DataType::Custom {
105 name: format!("TIME({})", p),
106 }));
107 }
108
109 // Handle DataType::Decimal specially BEFORE T-SQL transform
110 // because TSQL converts DECIMAL to NUMERIC, but Fabric wants DECIMAL
111 if let Expression::DataType(DataType::Decimal { precision, scale }) = &expr {
112 let name = Self::decimal_type_name(*precision, *scale);
113 return Ok(Expression::DataType(DataType::Custom { name }));
114 }
115
116 // Handle AT TIME ZONE with TIMESTAMPTZ cast
117 // Reference: Python sqlglot Fabric dialect cast_sql and attimezone_sql methods
118 // Input: CAST(x AS TIMESTAMPTZ) AT TIME ZONE 'Pacific Standard Time'
119 // Output: CAST(CAST(x AS DATETIMEOFFSET(6)) AT TIME ZONE 'Pacific Standard Time' AS DATETIME2(6))
120 if let Expression::AtTimeZone(ref at_tz) = expr {
121 // Check if this contains a TIMESTAMPTZ cast
122 if let Expression::Cast(ref inner_cast) = at_tz.this {
123 if let DataType::Timestamp {
124 timezone: true,
125 precision,
126 } = &inner_cast.to
127 {
128 // Get precision, default 6, cap at 6
129 let capped_precision = FabricDialect::cap_precision(*precision, 6);
130
131 // Create inner DATETIMEOFFSET cast
132 let datetimeoffset_cast = Expression::Cast(Box::new(Cast {
133 this: inner_cast.this.clone(),
134 to: DataType::Custom {
135 name: format!("DATETIMEOFFSET({})", capped_precision),
136 },
137 trailing_comments: inner_cast.trailing_comments.clone(),
138 double_colon_syntax: false,
139 format: None,
140 default: None,
141 inferred_type: None,
142 }));
143
144 // Create new AT TIME ZONE with DATETIMEOFFSET
145 let new_at_tz =
146 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
147 this: datetimeoffset_cast,
148 zone: at_tz.zone.clone(),
149 }));
150
151 // Wrap in outer DATETIME2 cast
152 return Ok(Expression::Cast(Box::new(Cast {
153 this: new_at_tz,
154 to: DataType::Custom {
155 name: format!("DATETIME2({})", capped_precision),
156 },
157 trailing_comments: Vec::new(),
158 double_colon_syntax: false,
159 format: None,
160 default: None,
161 inferred_type: None,
162 })));
163 }
164 }
165 }
166
167 // Handle UnixToTime -> DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))
168 // Reference: Python sqlglot Fabric dialect unixtotime_sql
169 if let Expression::UnixToTime(ref f) = expr {
170 // Build: column * 1e6
171 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
172 left: (*f.this).clone(),
173 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
174 left_comments: Vec::new(),
175 operator_comments: Vec::new(),
176 trailing_comments: Vec::new(),
177 inferred_type: None,
178 }));
179
180 // Build: ROUND(column * 1e6, 0)
181 let round_expr = Expression::Function(Box::new(Function::new(
182 "ROUND".to_string(),
183 vec![
184 column_times_1e6,
185 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
186 ],
187 )));
188
189 // Build: CAST(ROUND(...) AS BIGINT)
190 let cast_to_bigint = Expression::Cast(Box::new(Cast {
191 this: round_expr,
192 to: DataType::BigInt { length: None },
193 trailing_comments: Vec::new(),
194 double_colon_syntax: false,
195 format: None,
196 default: None,
197 inferred_type: None,
198 }));
199
200 // Build: CAST('1970-01-01' AS DATETIME2(6))
201 let epoch_start = Expression::Cast(Box::new(Cast {
202 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
203 to: DataType::Custom {
204 name: "DATETIME2(6)".to_string(),
205 },
206 trailing_comments: Vec::new(),
207 double_colon_syntax: false,
208 format: None,
209 default: None,
210 inferred_type: None,
211 }));
212
213 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
214 let dateadd = Expression::Function(Box::new(Function::new(
215 "DATEADD".to_string(),
216 vec![
217 Expression::Identifier(Identifier::new("MICROSECONDS")),
218 cast_to_bigint,
219 epoch_start,
220 ],
221 )));
222
223 return Ok(dateadd);
224 }
225
226 // Handle Function named UNIX_TO_TIME (parsed as generic function, not UnixToTime expression)
227 // Reference: Python sqlglot Fabric dialect unixtotime_sql
228 if let Expression::Function(ref f) = expr {
229 if f.name.eq_ignore_ascii_case("UNIX_TO_TIME") && !f.args.is_empty() {
230 let timestamp_input = f.args[0].clone();
231
232 // Build: column * 1e6
233 let column_times_1e6 = Expression::Mul(Box::new(BinaryOp {
234 left: timestamp_input,
235 right: Expression::Literal(Box::new(Literal::Number("1e6".to_string()))),
236 left_comments: Vec::new(),
237 operator_comments: Vec::new(),
238 trailing_comments: Vec::new(),
239 inferred_type: None,
240 }));
241
242 // Build: ROUND(column * 1e6, 0)
243 let round_expr = Expression::Function(Box::new(Function::new(
244 "ROUND".to_string(),
245 vec![
246 column_times_1e6,
247 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
248 ],
249 )));
250
251 // Build: CAST(ROUND(...) AS BIGINT)
252 let cast_to_bigint = Expression::Cast(Box::new(Cast {
253 this: round_expr,
254 to: DataType::BigInt { length: None },
255 trailing_comments: Vec::new(),
256 double_colon_syntax: false,
257 format: None,
258 default: None,
259 inferred_type: None,
260 }));
261
262 // Build: CAST('1970-01-01' AS DATETIME2(6))
263 let epoch_start = Expression::Cast(Box::new(Cast {
264 this: Expression::Literal(Box::new(Literal::String("1970-01-01".to_string()))),
265 to: DataType::Custom {
266 name: "DATETIME2(6)".to_string(),
267 },
268 trailing_comments: Vec::new(),
269 double_colon_syntax: false,
270 format: None,
271 default: None,
272 inferred_type: None,
273 }));
274
275 // Build: DATEADD(MICROSECONDS, cast_to_bigint, epoch_start)
276 let dateadd = Expression::Function(Box::new(Function::new(
277 "DATEADD".to_string(),
278 vec![
279 Expression::Identifier(Identifier::new("MICROSECONDS")),
280 cast_to_bigint,
281 epoch_start,
282 ],
283 )));
284
285 return Ok(dateadd);
286 }
287 }
288
289 // Delegate to T-SQL for other transformations
290 let tsql = TSQLDialect;
291 let transformed = tsql.transform_expr(expr)?;
292
293 // Apply Fabric-specific transformations to the result
294 self.transform_fabric_expr(transformed)
295 }
296}
297
298#[cfg(feature = "transpile")]
299impl FabricDialect {
300 /// Fabric-specific expression transformations
301 fn transform_fabric_expr(&self, expr: Expression) -> Result<Expression> {
302 match expr {
303 // Handle DataType expressions with Fabric-specific type mappings
304 Expression::DataType(dt) => self.transform_fabric_data_type(dt),
305
306 // Pass through everything else
307 _ => Ok(expr),
308 }
309 }
310
311 /// Transform data types according to Fabric TYPE_MAPPING
312 /// Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
313 fn transform_fabric_data_type(&self, dt: DataType) -> Result<Expression> {
314 let transformed = match dt {
315 // TIMESTAMP -> DATETIME2(6) with precision handling
316 // Note: TSQL already converts this to DATETIME2, but without precision
317 DataType::Timestamp { precision, .. } => {
318 let p = Self::cap_precision(precision, 6);
319 DataType::Custom {
320 name: format!("DATETIME2({})", p),
321 }
322 }
323
324 // TIME -> TIME(6) default, capped at 6
325 DataType::Time { precision, .. } => {
326 let p = Self::cap_precision(precision, 6);
327 DataType::Custom {
328 name: format!("TIME({})", p),
329 }
330 }
331
332 // INT -> INT (override TSQL which may output INTEGER)
333 DataType::Int { .. } => DataType::Custom {
334 name: "INT".to_string(),
335 },
336
337 // DECIMAL -> DECIMAL (override TSQL which converts to NUMERIC)
338 DataType::Decimal { precision, scale } => DataType::Custom {
339 name: Self::decimal_type_name(precision, scale),
340 },
341
342 // JSON -> VARCHAR
343 DataType::Json => DataType::Custom {
344 name: "VARCHAR".to_string(),
345 },
346
347 // UUID -> UNIQUEIDENTIFIER (already handled by TSQL, but ensure it's here)
348 DataType::Uuid => DataType::Custom {
349 name: "UNIQUEIDENTIFIER".to_string(),
350 },
351
352 // TinyInt -> SMALLINT
353 DataType::TinyInt { .. } => DataType::Custom {
354 name: "SMALLINT".to_string(),
355 },
356
357 // Handle Custom types for Fabric-specific mappings
358 DataType::Custom { ref name } => {
359 let upper = name.to_uppercase();
360
361 // Parse out precision and scale if present: "TYPENAME(n)" or "TYPENAME(n, m)"
362 let (base_name, precision, scale) =
363 TSQLDialect::parse_type_precision_and_scale(&upper);
364 let has_max_length = upper.contains("(MAX)");
365
366 match base_name.as_str() {
367 // DATETIME -> DATETIME2(6)
368 "DATETIME" => DataType::Custom {
369 name: "DATETIME2(6)".to_string(),
370 },
371
372 // SMALLDATETIME -> DATETIME2(6)
373 "SMALLDATETIME" => DataType::Custom {
374 name: "DATETIME2(6)".to_string(),
375 },
376
377 // DATETIME2 -> DATETIME2(6) default, cap at 6
378 "DATETIME2" => {
379 let p = Self::cap_precision(precision, 6);
380 DataType::Custom {
381 name: format!("DATETIME2({})", p),
382 }
383 }
384
385 // DATETIMEOFFSET -> cap precision at 6
386 "DATETIMEOFFSET" => {
387 let p = Self::cap_precision(precision, 6);
388 DataType::Custom {
389 name: format!("DATETIMEOFFSET({})", p),
390 }
391 }
392
393 // TIME -> TIME(6) default, cap at 6
394 "TIME" => {
395 let p = Self::cap_precision(precision, 6);
396 DataType::Custom {
397 name: format!("TIME({})", p),
398 }
399 }
400
401 // TIMESTAMP -> DATETIME2(6)
402 "TIMESTAMP" => DataType::Custom {
403 name: "DATETIME2(6)".to_string(),
404 },
405
406 // TIMESTAMPNTZ -> DATETIME2(6) with precision
407 "TIMESTAMPNTZ" => {
408 let p = Self::cap_precision(precision, 6);
409 DataType::Custom {
410 name: format!("DATETIME2({})", p),
411 }
412 }
413
414 // TIMESTAMPTZ -> DATETIME2(6) with precision
415 "TIMESTAMPTZ" => {
416 let p = Self::cap_precision(precision, 6);
417 DataType::Custom {
418 name: format!("DATETIME2({})", p),
419 }
420 }
421
422 // IMAGE -> VARBINARY
423 "IMAGE" => DataType::Custom {
424 name: "VARBINARY".to_string(),
425 },
426
427 // MONEY -> DECIMAL
428 "MONEY" => DataType::Custom {
429 name: "DECIMAL".to_string(),
430 },
431
432 // SMALLMONEY -> DECIMAL
433 "SMALLMONEY" => DataType::Custom {
434 name: "DECIMAL".to_string(),
435 },
436
437 // NCHAR -> CHAR (with length preserved)
438 "NCHAR" => {
439 if has_max_length {
440 DataType::Custom {
441 name: "CHAR(MAX)".to_string(),
442 }
443 } else if let Some(len) = precision {
444 DataType::Custom {
445 name: format!("CHAR({})", len),
446 }
447 } else {
448 DataType::Custom {
449 name: "CHAR".to_string(),
450 }
451 }
452 }
453
454 // NVARCHAR -> VARCHAR (with length preserved)
455 "NVARCHAR" => {
456 if has_max_length {
457 DataType::Custom {
458 name: "VARCHAR(MAX)".to_string(),
459 }
460 } else if let Some(len) = precision {
461 DataType::Custom {
462 name: format!("VARCHAR({})", len),
463 }
464 } else {
465 DataType::Custom {
466 name: "VARCHAR".to_string(),
467 }
468 }
469 }
470
471 // TINYINT -> SMALLINT
472 "TINYINT" => DataType::Custom {
473 name: "SMALLINT".to_string(),
474 },
475
476 // UTINYINT -> SMALLINT
477 "UTINYINT" => DataType::Custom {
478 name: "SMALLINT".to_string(),
479 },
480
481 // VARIANT -> SQL_VARIANT
482 "VARIANT" => DataType::Custom {
483 name: "SQL_VARIANT".to_string(),
484 },
485
486 // XML -> VARCHAR
487 "XML" => DataType::Custom {
488 name: "VARCHAR".to_string(),
489 },
490
491 // NUMERIC -> DECIMAL (override TSQL's conversion)
492 // Fabric uses DECIMAL, not NUMERIC
493 "DECIMAL" | "NUMERIC" => DataType::Custom {
494 name: Self::decimal_type_name(precision, scale),
495 },
496
497 // Pass through other custom types unchanged
498 _ => dt,
499 }
500 }
501
502 // Keep all other types as transformed by TSQL
503 other => other,
504 };
505
506 Ok(Expression::DataType(transformed))
507 }
508
509 /// Cap precision to max value, defaulting to max if not specified
510 fn cap_precision(precision: Option<u32>, max: u32) -> u32 {
511 match precision {
512 Some(p) if p > max => max,
513 Some(p) => p,
514 None => max, // Default to max if not specified
515 }
516 }
517
518 fn decimal_type_name(precision: Option<u32>, scale: Option<u32>) -> String {
519 match (precision, scale) {
520 (Some(p), Some(s)) => format!("DECIMAL({}, {})", p, s),
521 (Some(p), None) => format!("DECIMAL({})", p),
522 (None, _) => "DECIMAL".to_string(),
523 }
524 }
525}