Skip to main content

polyglot_sql/dialects/
drill.rs

1//! Apache Drill SQL Dialect
2//!
3//! Drill-specific SQL dialect based on sqlglot patterns.
4//!
5//! Key characteristics:
6//! - Uses backticks for identifiers
7//! - Backslash string escapes
8//! - No TRY_CAST support (must use CAST)
9//! - NULLS LAST is default ordering
10//! - Functions: REPEATED_COUNT (array size), REPEATED_CONTAINS (array contains)
11//! - POW for power function
12//! - Date format: 'yyyy-MM-dd'
13//! - Type mappings: INT→INTEGER, TEXT→VARCHAR, etc.
14
15use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{Expression, Function};
18use crate::generator::{GeneratorConfig, NormalizeFunctions};
19use crate::tokens::TokenizerConfig;
20
21/// Apache Drill dialect
22pub struct DrillDialect;
23
24impl DialectImpl for DrillDialect {
25    fn dialect_type(&self) -> DialectType {
26        DialectType::Drill
27    }
28
29    fn tokenizer_config(&self) -> TokenizerConfig {
30        let mut config = TokenizerConfig::default();
31        // Drill uses backticks for identifiers
32        config.identifiers.insert('`', '`');
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '`',
40            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
41            dialect: Some(DialectType::Drill),
42            // Drill: NORMALIZE_FUNCTIONS = False, PRESERVE_ORIGINAL_NAMES = True
43            normalize_functions: NormalizeFunctions::None,
44            ..Default::default()
45        }
46    }
47
48    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
49        match expr {
50            // TRY_CAST → CAST in Drill (no TRY_CAST support)
51            Expression::TryCast(c) => Ok(Expression::Cast(c)),
52
53            // SafeCast → CAST in Drill
54            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
55
56            // CURRENT_TIMESTAMP without parentheses
57            Expression::CurrentTimestamp(_) => Ok(Expression::CurrentTimestamp(
58                crate::expressions::CurrentTimestamp {
59                    precision: None,
60                    sysdate: false,
61                },
62            )),
63
64            // ILIKE → `ILIKE` (backtick quoted function in Drill)
65            // Drill supports ILIKE but it needs to be backtick-quoted
66            Expression::ILike(op) => {
67                // Just pass through - Drill supports ILIKE
68                Ok(Expression::ILike(op))
69            }
70
71            // Power → POW in Drill
72            Expression::Power(op) => Ok(Expression::Function(Box::new(Function::new(
73                "POW".to_string(),
74                vec![op.this, op.expression],
75            )))),
76
77            // ArrayContains → REPEATED_CONTAINS in Drill
78            Expression::ArrayContains(f) => Ok(Expression::Function(Box::new(Function::new(
79                "REPEATED_CONTAINS".to_string(),
80                vec![f.this, f.expression],
81            )))),
82
83            // Generic function transformations
84            Expression::Function(f) => self.transform_function(*f),
85
86            // Pass through everything else
87            _ => Ok(expr),
88        }
89    }
90}
91
92impl DrillDialect {
93    fn transform_function(&self, f: Function) -> Result<Expression> {
94        let name_upper = f.name.to_uppercase();
95        match name_upper.as_str() {
96            // CURRENT_TIMESTAMP without parentheses
97            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
98                crate::expressions::CurrentTimestamp {
99                    precision: None,
100                    sysdate: false,
101                },
102            )),
103
104            // ARRAY_SIZE / ARRAY_LENGTH → REPEATED_COUNT
105            "ARRAY_SIZE" | "ARRAY_LENGTH" | "CARDINALITY" | "SIZE" => Ok(Expression::Function(
106                Box::new(Function::new("REPEATED_COUNT".to_string(), f.args)),
107            )),
108
109            // ARRAY_CONTAINS → REPEATED_CONTAINS
110            "ARRAY_CONTAINS" | "CONTAINS" => Ok(Expression::Function(Box::new(Function::new(
111                "REPEATED_CONTAINS".to_string(),
112                f.args,
113            )))),
114
115            // POWER → POW
116            "POWER" => Ok(Expression::Function(Box::new(Function::new(
117                "POW".to_string(),
118                f.args,
119            )))),
120
121            // LEVENSHTEIN → LEVENSHTEIN_DISTANCE
122            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
123                "LEVENSHTEIN_DISTANCE".to_string(),
124                f.args,
125            )))),
126
127            // REGEXP_LIKE → REGEXP_MATCHES
128            "REGEXP_LIKE" | "RLIKE" => Ok(Expression::Function(Box::new(Function::new(
129                "REGEXP_MATCHES".to_string(),
130                f.args,
131            )))),
132
133            // TO_TIMESTAMP → TO_TIMESTAMP (native, but for parsing)
134            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
135
136            // TO_DATE → TO_DATE (native)
137            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
138
139            // DATE_FORMAT → TO_CHAR
140            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
141                "TO_CHAR".to_string(),
142                f.args,
143            )))),
144
145            // strftime → TO_CHAR
146            "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
147                "TO_CHAR".to_string(),
148                f.args,
149            )))),
150
151            // UNIX_TIMESTAMP → native
152            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
153
154            // FROM_UNIXTIME → native (but named UNIX_TIMESTAMP_TO_TIMESTAMP in Drill)
155            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
156
157            // DATE_ADD with interval support
158            "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
159
160            // DATE_SUB with interval support
161            "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
162
163            // STRPOS → STRPOS (native in Drill)
164            "STRPOS" => Ok(Expression::Function(Box::new(f))),
165
166            // POSITION → STRPOS
167            "POSITION" => Ok(Expression::Function(Box::new(Function::new(
168                "STRPOS".to_string(),
169                f.args,
170            )))),
171
172            // Pass through everything else
173            _ => Ok(Expression::Function(Box::new(f))),
174        }
175    }
176}