Skip to main content

polyglot_sql/dialects/
dremio.rs

1//! Dremio SQL Dialect
2//!
3//! Dremio-specific SQL dialect based on sqlglot patterns.
4//! Reference: https://docs.dremio.com/current/reference/sql/data-types/
5//!
6//! Key characteristics:
7//! - NULLS LAST is default ordering
8//! - DATE_ADD/DATE_SUB with INTERVAL CAST for non-DAY units
9//! - CURRENT_DATE_UTC for current date in UTC
10//! - ARRAY_GENERATE_RANGE for generating series
11//! - No timezone-aware timestamps
12//! - Comments support: --, //, /* */
13//! - Type mappings: SMALLINT→INT, TINYINT→INT, ARRAY→LIST, etc.
14
15use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{Expression, Function};
18use crate::generator::GeneratorConfig;
19use crate::tokens::TokenizerConfig;
20
21/// Dremio dialect
22pub struct DremioDialect;
23
24impl DialectImpl for DremioDialect {
25    fn dialect_type(&self) -> DialectType {
26        DialectType::Dremio
27    }
28
29    fn tokenizer_config(&self) -> TokenizerConfig {
30        let mut config = TokenizerConfig::default();
31        // Dremio uses double quotes for identifiers
32        config.identifiers.insert('"', '"');
33        // Dremio supports multiple comment styles: --, //, /* */
34        // Default tokenizer handles -- and /* */
35        config
36    }
37
38    fn generator_config(&self) -> GeneratorConfig {
39        use crate::generator::IdentifierQuoteStyle;
40        GeneratorConfig {
41            identifier_quote: '"',
42            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
43            dialect: Some(DialectType::Dremio),
44            // Dremio uses singular form for intervals (DAY not DAYS)
45            interval_allows_plural_form: false,
46            // Dremio requires literal values in LIMIT clause
47            limit_only_literals: true,
48            // Dremio doesn't support COUNT(DISTINCT a, b) - needs transformation
49            multi_arg_distinct: false,
50            // Dremio supports BETWEEN SYMMETRIC/ASYMMETRIC
51            supports_between_flags: true,
52            ..Default::default()
53        }
54    }
55
56    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
57        match expr {
58            // Generic function transformations
59            Expression::Function(f) => self.transform_function(*f),
60
61            // Aggregate function transformations
62            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
63
64            // Pass through everything else
65            _ => Ok(expr),
66        }
67    }
68}
69
70impl DremioDialect {
71    fn transform_function(&self, f: Function) -> Result<Expression> {
72        let name_upper = f.name.to_uppercase();
73        match name_upper.as_str() {
74            // GenerateSeries → ARRAY_GENERATE_RANGE
75            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
76                "ARRAY_GENERATE_RANGE".to_string(),
77                f.args,
78            )))),
79
80            // TimeToStr → TO_CHAR
81            "DATE_FORMAT" | "TIME_TO_STR" | "STRFTIME" => {
82                Ok(Expression::Function(Box::new(Function::new(
83                    "TO_CHAR".to_string(),
84                    f.args,
85                ))))
86            }
87
88            // TO_DATE is native
89            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
90
91            // DATE_ADD is native (with interval cast for non-day units)
92            "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
93
94            // DATE_SUB is native (with interval cast for non-day units)
95            "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
96
97            // REGEXP_MATCHES → REGEXP_LIKE (Dremio uses REGEXP_LIKE)
98            "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new(
99                "REGEXP_LIKE".to_string(),
100                f.args,
101            )))),
102
103            // REPEATSTR → REPEAT (Dremio uses REPEAT, not REPEATSTR)
104            "REPEATSTR" => Ok(Expression::Function(Box::new(Function::new(
105                "REPEAT".to_string(),
106                f.args,
107            )))),
108
109            // DATE_PART → DATE_PART (native, same as EXTRACT)
110            "DATE_PART" | "EXTRACT" => Ok(Expression::Function(Box::new(f))),
111
112            // DATETYPE constructor for date literals
113            "DATETYPE" => Ok(Expression::Function(Box::new(f))),
114
115            // Pass through everything else
116            _ => Ok(Expression::Function(Box::new(f))),
117        }
118    }
119
120    fn transform_aggregate_function(
121        &self,
122        f: Box<crate::expressions::AggregateFunction>,
123    ) -> Result<Expression> {
124        let name_upper = f.name.to_uppercase();
125        match name_upper.as_str() {
126            // BitwiseAndAgg → BIT_AND
127            "BITWISE_AND_AGG" | "BIT_AND_AGG" => {
128                Ok(Expression::Function(Box::new(Function::new(
129                    "BIT_AND".to_string(),
130                    f.args,
131                ))))
132            }
133
134            // BitwiseOrAgg → BIT_OR
135            "BITWISE_OR_AGG" | "BIT_OR_AGG" => {
136                Ok(Expression::Function(Box::new(Function::new(
137                    "BIT_OR".to_string(),
138                    f.args,
139                ))))
140            }
141
142            // Pass through everything else
143            _ => Ok(Expression::AggregateFunction(f)),
144        }
145    }
146}
147
148// Note: Dremio type mappings (handled in generator if needed):
149// - SMALLINT → INT
150// - TINYINT → INT
151// - BINARY → VARBINARY
152// - TEXT → VARCHAR
153// - NCHAR → VARCHAR
154// - CHAR → VARCHAR
155// - TIMESTAMPNTZ → TIMESTAMP
156// - DATETIME → TIMESTAMP
157// - ARRAY → LIST
158// - BIT → BOOLEAN
159//
160// Dremio does not support timezone-aware TIMESTAMP types