Skip to main content

polyglot_sql/dialects/
dremio.rs

1//! Dremio SQL Dialect
2//!
3//! Dremio-specific SQL dialect based on sqlglot patterns.
4//! Reference: https://docs.dremio.com/current/reference/sql/data-types/
5//!
6//! Key characteristics:
7//! - NULLS LAST is default ordering
8//! - DATE_ADD/DATE_SUB with INTERVAL CAST for non-DAY units
9//! - CURRENT_DATE_UTC for current date in UTC
10//! - ARRAY_GENERATE_RANGE for generating series
11//! - No timezone-aware timestamps
12//! - Comments support: --, //, /* */
13//! - Type mappings: SMALLINT→INT, TINYINT→INT, ARRAY→LIST, etc.
14
15use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{Expression, Function};
18#[cfg(feature = "generate")]
19use crate::generator::GeneratorConfig;
20use crate::tokens::TokenizerConfig;
21
22/// Dremio dialect
23pub struct DremioDialect;
24
25impl DialectImpl for DremioDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::Dremio
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // Dremio uses double quotes for identifiers
33        config.identifiers.insert('"', '"');
34        // Dremio supports multiple comment styles: --, //, /* */
35        // Default tokenizer handles -- and /* */
36        config
37    }
38
39    #[cfg(feature = "generate")]
40
41    fn generator_config(&self) -> GeneratorConfig {
42        use crate::generator::IdentifierQuoteStyle;
43        GeneratorConfig {
44            identifier_quote: '"',
45            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
46            dialect: Some(DialectType::Dremio),
47            // Dremio uses singular form for intervals (DAY not DAYS)
48            interval_allows_plural_form: false,
49            // Dremio requires literal values in LIMIT clause
50            limit_only_literals: true,
51            // Dremio doesn't support COUNT(DISTINCT a, b) - needs transformation
52            multi_arg_distinct: false,
53            // Dremio supports BETWEEN SYMMETRIC/ASYMMETRIC
54            supports_between_flags: true,
55            ..Default::default()
56        }
57    }
58
59    #[cfg(feature = "transpile")]
60
61    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
62        match expr {
63            // Generic function transformations
64            Expression::Function(f) => self.transform_function(*f),
65
66            // Aggregate function transformations
67            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
68
69            // Pass through everything else
70            _ => Ok(expr),
71        }
72    }
73}
74
75#[cfg(feature = "transpile")]
76impl DremioDialect {
77    fn transform_function(&self, f: Function) -> Result<Expression> {
78        let name_upper = f.name.to_uppercase();
79        match name_upper.as_str() {
80            // GenerateSeries → ARRAY_GENERATE_RANGE
81            "GENERATE_SERIES" => Ok(Expression::Function(Box::new(Function::new(
82                "ARRAY_GENERATE_RANGE".to_string(),
83                f.args,
84            )))),
85
86            // TimeToStr → TO_CHAR
87            "DATE_FORMAT" | "TIME_TO_STR" | "STRFTIME" => Ok(Expression::Function(Box::new(
88                Function::new("TO_CHAR".to_string(), f.args),
89            ))),
90
91            // TO_DATE is native
92            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
93
94            // DATE_ADD is native (with interval cast for non-day units)
95            "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
96
97            // DATE_SUB is native (with interval cast for non-day units)
98            "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
99
100            // REGEXP_MATCHES → REGEXP_LIKE (Dremio uses REGEXP_LIKE)
101            "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new(
102                "REGEXP_LIKE".to_string(),
103                f.args,
104            )))),
105
106            // REPEATSTR → REPEAT (Dremio uses REPEAT, not REPEATSTR)
107            "REPEATSTR" => Ok(Expression::Function(Box::new(Function::new(
108                "REPEAT".to_string(),
109                f.args,
110            )))),
111
112            // DATE_PART → DATE_PART (native, same as EXTRACT)
113            "DATE_PART" | "EXTRACT" => Ok(Expression::Function(Box::new(f))),
114
115            // DATETYPE constructor for date literals
116            "DATETYPE" => Ok(Expression::Function(Box::new(f))),
117
118            // Pass through everything else
119            _ => Ok(Expression::Function(Box::new(f))),
120        }
121    }
122
123    fn transform_aggregate_function(
124        &self,
125        f: Box<crate::expressions::AggregateFunction>,
126    ) -> Result<Expression> {
127        let name_upper = f.name.to_uppercase();
128        match name_upper.as_str() {
129            // BitwiseAndAgg → BIT_AND
130            "BITWISE_AND_AGG" | "BIT_AND_AGG" => Ok(Expression::Function(Box::new(Function::new(
131                "BIT_AND".to_string(),
132                f.args,
133            )))),
134
135            // BitwiseOrAgg → BIT_OR
136            "BITWISE_OR_AGG" | "BIT_OR_AGG" => Ok(Expression::Function(Box::new(Function::new(
137                "BIT_OR".to_string(),
138                f.args,
139            )))),
140
141            // Pass through everything else
142            _ => Ok(Expression::AggregateFunction(f)),
143        }
144    }
145}
146
147// Note: Dremio type mappings (handled in generator if needed):
148// - SMALLINT → INT
149// - TINYINT → INT
150// - BINARY → VARBINARY
151// - TEXT → VARCHAR
152// - NCHAR → VARCHAR
153// - CHAR → VARCHAR
154// - TIMESTAMPNTZ → TIMESTAMP
155// - DATETIME → TIMESTAMP
156// - ARRAY → LIST
157// - BIT → BOOLEAN
158//
159// Dremio does not support timezone-aware TIMESTAMP types