Skip to main content

polyglot_sql/dialects/
drill.rs

1//! Apache Drill SQL Dialect
2//!
3//! Drill-specific SQL dialect based on sqlglot patterns.
4//!
5//! Key characteristics:
6//! - Uses backticks for identifiers
7//! - Backslash string escapes
8//! - No TRY_CAST support (must use CAST)
9//! - NULLS LAST is default ordering
10//! - Functions: REPEATED_COUNT (array size), REPEATED_CONTAINS (array contains)
11//! - POW for power function
12//! - Date format: 'yyyy-MM-dd'
13//! - Type mappings: INT→INTEGER, TEXT→VARCHAR, etc.
14
15use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{Expression, Function};
18#[cfg(feature = "generate")]
19use crate::generator::{GeneratorConfig, NormalizeFunctions};
20use crate::tokens::TokenizerConfig;
21
22/// Apache Drill dialect
23pub struct DrillDialect;
24
25impl DialectImpl for DrillDialect {
26    fn dialect_type(&self) -> DialectType {
27        DialectType::Drill
28    }
29
30    fn tokenizer_config(&self) -> TokenizerConfig {
31        let mut config = TokenizerConfig::default();
32        // Drill uses backticks for identifiers
33        config.identifiers.insert('`', '`');
34        config
35    }
36
37    #[cfg(feature = "generate")]
38
39    fn generator_config(&self) -> GeneratorConfig {
40        use crate::generator::IdentifierQuoteStyle;
41        GeneratorConfig {
42            identifier_quote: '`',
43            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
44            dialect: Some(DialectType::Drill),
45            // Drill: NORMALIZE_FUNCTIONS = False, PRESERVE_ORIGINAL_NAMES = True
46            normalize_functions: NormalizeFunctions::None,
47            ..Default::default()
48        }
49    }
50
51    #[cfg(feature = "transpile")]
52
53    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
54        match expr {
55            // TRY_CAST → CAST in Drill (no TRY_CAST support)
56            Expression::TryCast(c) => Ok(Expression::Cast(c)),
57
58            // SafeCast → CAST in Drill
59            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
60
61            // CURRENT_TIMESTAMP without parentheses
62            Expression::CurrentTimestamp(_) => Ok(Expression::CurrentTimestamp(
63                crate::expressions::CurrentTimestamp {
64                    precision: None,
65                    sysdate: false,
66                },
67            )),
68
69            // ILIKE → `ILIKE` (backtick quoted function in Drill)
70            // Drill supports ILIKE but it needs to be backtick-quoted
71            Expression::ILike(op) => {
72                // Just pass through - Drill supports ILIKE
73                Ok(Expression::ILike(op))
74            }
75
76            // Power → POW in Drill
77            Expression::Power(op) => Ok(Expression::Function(Box::new(Function::new(
78                "POW".to_string(),
79                vec![op.this, op.expression],
80            )))),
81
82            // ArrayContains → REPEATED_CONTAINS in Drill
83            Expression::ArrayContains(f) => Ok(Expression::Function(Box::new(Function::new(
84                "REPEATED_CONTAINS".to_string(),
85                vec![f.this, f.expression],
86            )))),
87
88            // Generic function transformations
89            Expression::Function(f) => self.transform_function(*f),
90
91            // Pass through everything else
92            _ => Ok(expr),
93        }
94    }
95}
96
97#[cfg(feature = "transpile")]
98impl DrillDialect {
99    fn transform_function(&self, f: Function) -> Result<Expression> {
100        let name_upper = f.name.to_uppercase();
101        match name_upper.as_str() {
102            // CURRENT_TIMESTAMP without parentheses
103            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
104                crate::expressions::CurrentTimestamp {
105                    precision: None,
106                    sysdate: false,
107                },
108            )),
109
110            // ARRAY_SIZE / ARRAY_LENGTH → REPEATED_COUNT
111            "ARRAY_SIZE" | "ARRAY_LENGTH" | "CARDINALITY" | "SIZE" => Ok(Expression::Function(
112                Box::new(Function::new("REPEATED_COUNT".to_string(), f.args)),
113            )),
114
115            // ARRAY_CONTAINS → REPEATED_CONTAINS
116            "ARRAY_CONTAINS" | "CONTAINS" => Ok(Expression::Function(Box::new(Function::new(
117                "REPEATED_CONTAINS".to_string(),
118                f.args,
119            )))),
120
121            // POWER → POW
122            "POWER" => Ok(Expression::Function(Box::new(Function::new(
123                "POW".to_string(),
124                f.args,
125            )))),
126
127            // LEVENSHTEIN → LEVENSHTEIN_DISTANCE
128            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
129                "LEVENSHTEIN_DISTANCE".to_string(),
130                f.args,
131            )))),
132
133            // REGEXP_LIKE → REGEXP_MATCHES
134            "REGEXP_LIKE" | "RLIKE" => Ok(Expression::Function(Box::new(Function::new(
135                "REGEXP_MATCHES".to_string(),
136                f.args,
137            )))),
138
139            // TO_TIMESTAMP → TO_TIMESTAMP (native, but for parsing)
140            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
141
142            // TO_DATE → TO_DATE (native)
143            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
144
145            // DATE_FORMAT → TO_CHAR
146            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
147                "TO_CHAR".to_string(),
148                f.args,
149            )))),
150
151            // strftime → TO_CHAR
152            "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
153                "TO_CHAR".to_string(),
154                f.args,
155            )))),
156
157            // UNIX_TIMESTAMP → native
158            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
159
160            // FROM_UNIXTIME → native (but named UNIX_TIMESTAMP_TO_TIMESTAMP in Drill)
161            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
162
163            // DATE_ADD with interval support
164            "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
165
166            // DATE_SUB with interval support
167            "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
168
169            // STRPOS → STRPOS (native in Drill)
170            "STRPOS" => Ok(Expression::Function(Box::new(f))),
171
172            // POSITION → STRPOS
173            "POSITION" => Ok(Expression::Function(Box::new(Function::new(
174                "STRPOS".to_string(),
175                f.args,
176            )))),
177
178            // Pass through everything else
179            _ => Ok(Expression::Function(Box::new(f))),
180        }
181    }
182}