Skip to main content

polyglot_sql/dialects/
drill.rs

1//! Apache Drill SQL Dialect
2//!
3//! Drill-specific SQL dialect based on sqlglot patterns.
4//!
5//! Key characteristics:
6//! - Uses backticks for identifiers
7//! - Backslash string escapes
8//! - No TRY_CAST support (must use CAST)
9//! - NULLS LAST is default ordering
10//! - Functions: REPEATED_COUNT (array size), REPEATED_CONTAINS (array contains)
11//! - POW for power function
12//! - Date format: 'yyyy-MM-dd'
13//! - Type mappings: INT→INTEGER, TEXT→VARCHAR, etc.
14
15use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{Expression, Function};
18use crate::generator::{GeneratorConfig, NormalizeFunctions};
19use crate::tokens::TokenizerConfig;
20
21/// Apache Drill dialect
22pub struct DrillDialect;
23
24impl DialectImpl for DrillDialect {
25    fn dialect_type(&self) -> DialectType {
26        DialectType::Drill
27    }
28
29    fn tokenizer_config(&self) -> TokenizerConfig {
30        let mut config = TokenizerConfig::default();
31        // Drill uses backticks for identifiers
32        config.identifiers.insert('`', '`');
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '`',
40            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
41            dialect: Some(DialectType::Drill),
42            // Drill: NORMALIZE_FUNCTIONS = False, PRESERVE_ORIGINAL_NAMES = True
43            normalize_functions: NormalizeFunctions::None,
44            ..Default::default()
45        }
46    }
47
48    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
49        match expr {
50            // TRY_CAST → CAST in Drill (no TRY_CAST support)
51            Expression::TryCast(c) => Ok(Expression::Cast(c)),
52
53            // SafeCast → CAST in Drill
54            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
55
56            // CURRENT_TIMESTAMP without parentheses
57            Expression::CurrentTimestamp(_) => Ok(Expression::CurrentTimestamp(
58                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
59            )),
60
61            // ILIKE → `ILIKE` (backtick quoted function in Drill)
62            // Drill supports ILIKE but it needs to be backtick-quoted
63            Expression::ILike(op) => {
64                // Just pass through - Drill supports ILIKE
65                Ok(Expression::ILike(op))
66            }
67
68            // Power → POW in Drill
69            Expression::Power(op) => Ok(Expression::Function(Box::new(Function::new(
70                "POW".to_string(),
71                vec![op.this, op.expression],
72            )))),
73
74            // ArrayContains → REPEATED_CONTAINS in Drill
75            Expression::ArrayContains(f) => Ok(Expression::Function(Box::new(Function::new(
76                "REPEATED_CONTAINS".to_string(),
77                vec![f.this, f.expression],
78            )))),
79
80            // Generic function transformations
81            Expression::Function(f) => self.transform_function(*f),
82
83            // Pass through everything else
84            _ => Ok(expr),
85        }
86    }
87}
88
89impl DrillDialect {
90    fn transform_function(&self, f: Function) -> Result<Expression> {
91        let name_upper = f.name.to_uppercase();
92        match name_upper.as_str() {
93            // CURRENT_TIMESTAMP without parentheses
94            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
95                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
96            )),
97
98            // ARRAY_SIZE / ARRAY_LENGTH → REPEATED_COUNT
99            "ARRAY_SIZE" | "ARRAY_LENGTH" | "CARDINALITY" | "SIZE" => {
100                Ok(Expression::Function(Box::new(Function::new(
101                    "REPEATED_COUNT".to_string(),
102                    f.args,
103                ))))
104            }
105
106            // ARRAY_CONTAINS → REPEATED_CONTAINS
107            "ARRAY_CONTAINS" | "CONTAINS" => Ok(Expression::Function(Box::new(Function::new(
108                "REPEATED_CONTAINS".to_string(),
109                f.args,
110            )))),
111
112            // POWER → POW
113            "POWER" => Ok(Expression::Function(Box::new(Function::new(
114                "POW".to_string(),
115                f.args,
116            )))),
117
118            // LEVENSHTEIN → LEVENSHTEIN_DISTANCE
119            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(Function::new(
120                "LEVENSHTEIN_DISTANCE".to_string(),
121                f.args,
122            )))),
123
124            // REGEXP_LIKE → REGEXP_MATCHES
125            "REGEXP_LIKE" | "RLIKE" => Ok(Expression::Function(Box::new(Function::new(
126                "REGEXP_MATCHES".to_string(),
127                f.args,
128            )))),
129
130            // TO_TIMESTAMP → TO_TIMESTAMP (native, but for parsing)
131            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
132
133            // TO_DATE → TO_DATE (native)
134            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
135
136            // DATE_FORMAT → TO_CHAR
137            "DATE_FORMAT" => Ok(Expression::Function(Box::new(Function::new(
138                "TO_CHAR".to_string(),
139                f.args,
140            )))),
141
142            // strftime → TO_CHAR
143            "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
144                "TO_CHAR".to_string(),
145                f.args,
146            )))),
147
148            // UNIX_TIMESTAMP → native
149            "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
150
151            // FROM_UNIXTIME → native (but named UNIX_TIMESTAMP_TO_TIMESTAMP in Drill)
152            "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
153
154            // DATE_ADD with interval support
155            "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
156
157            // DATE_SUB with interval support
158            "DATE_SUB" => Ok(Expression::Function(Box::new(f))),
159
160            // STRPOS → STRPOS (native in Drill)
161            "STRPOS" => Ok(Expression::Function(Box::new(f))),
162
163            // POSITION → STRPOS
164            "POSITION" => Ok(Expression::Function(Box::new(Function::new(
165                "STRPOS".to_string(),
166                f.args,
167            )))),
168
169            // Pass through everything else
170            _ => Ok(Expression::Function(Box::new(f))),
171        }
172    }
173}