1use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{BinaryOp, CeilFunc, DateTimeField, Expression, ExtractFunc, Function, LikeOp, Literal, Paren, UnaryFunc, VarArgFunc};
18use crate::generator::GeneratorConfig;
19use crate::tokens::TokenizerConfig;
20
21pub struct HiveDialect;
23
24impl DialectImpl for HiveDialect {
25 fn dialect_type(&self) -> DialectType {
26 DialectType::Hive
27 }
28
29 fn tokenizer_config(&self) -> TokenizerConfig {
30 let mut config = TokenizerConfig::default();
31 config.identifiers.clear();
33 config.identifiers.insert('`', '`');
34 config.quotes.insert("\"".to_string(), "\"".to_string());
36 config.string_escapes.push('\\');
38 config.keywords.insert("DIV".to_string(), crate::tokens::TokenType::Div);
40 config.numeric_literals.insert("L".to_string(), "BIGINT".to_string());
42 config.numeric_literals.insert("S".to_string(), "SMALLINT".to_string());
43 config.numeric_literals.insert("Y".to_string(), "TINYINT".to_string());
44 config.numeric_literals.insert("D".to_string(), "DOUBLE".to_string());
45 config.numeric_literals.insert("F".to_string(), "FLOAT".to_string());
46 config.numeric_literals.insert("BD".to_string(), "DECIMAL".to_string());
47 config.identifiers_can_start_with_digit = true;
49 config
50 }
51
52 fn generator_config(&self) -> GeneratorConfig {
53 use crate::generator::IdentifierQuoteStyle;
54 GeneratorConfig {
55 identifier_quote: '`',
56 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
57 dialect: Some(DialectType::Hive),
58 struct_field_sep: ": ",
60 alias_post_tablesample: true,
62 join_hints: false,
63 identifiers_can_start_with_digit: true,
64 schema_comment_with_eq: false,
66 ..Default::default()
67 }
68 }
69
70 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
71 match expr {
72 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
74 expressions: vec![f.this, f.expression],
75 }))),
76
77 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
79 expressions: vec![f.this, f.expression],
80 }))),
81
82 Expression::SafeCast(c) => Ok(Expression::Cast(c)),
87
88 Expression::ILike(op) => {
90 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
91 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
92 Ok(Expression::Like(Box::new(LikeOp {
93 left: lower_left,
94 right: lower_right,
95 escape: op.escape,
96 quantifier: op.quantifier,
97 })))
98 }
99
100 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
102
103 Expression::Explode(f) => Ok(Expression::Explode(f)),
105
106 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
108
109 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
111 seed: None, lower: None, upper: None,
112 }))),
113
114 Expression::Rand(r) => Ok(Expression::Rand(r)),
116
117 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
119 "CONCAT".to_string(),
120 vec![op.left, op.right],
121 )))),
122
123 Expression::Function(f) => self.transform_function(*f),
125
126 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
128
129 _ => Ok(expr),
131 }
132 }
133}
134
135impl HiveDialect {
136 fn transform_function(&self, f: Function) -> Result<Expression> {
137 let name_upper = f.name.to_uppercase();
138 match name_upper.as_str() {
139 "LOG" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
141 "LN".to_string(),
142 f.args,
143 )))),
144
145 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
147 expressions: f.args,
148 }))),
149
150 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
152 expressions: f.args,
153 }))),
154
155 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
157 expressions: f.args,
158 }))),
159
160 "GROUP_CONCAT" if !f.args.is_empty() => {
163 Ok(Expression::Function(Box::new(Function::new(
166 "COLLECT_LIST".to_string(),
167 f.args,
168 ))))
169 }
170
171 "STRING_AGG" if !f.args.is_empty() => {
173 Ok(Expression::Function(Box::new(Function::new(
175 "COLLECT_LIST".to_string(),
176 f.args,
177 ))))
178 }
179
180 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
182 "COLLECT_LIST".to_string(),
183 f.args,
184 )))),
185
186 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
188
189 "LENGTH" => Ok(Expression::Function(Box::new(f))),
191
192 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
194 f.args.into_iter().next().unwrap(),
195 )))),
196
197 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
199 seed: None, lower: None, upper: None,
200 }))),
201
202 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
204 seed: None, lower: None, upper: None,
205 }))),
206
207 "NOW" => Ok(Expression::CurrentTimestamp(
209 crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
210 )),
211
212 "GETDATE" => Ok(Expression::CurrentTimestamp(
214 crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
215 )),
216
217 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
219 crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
220 )),
221
222 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
224
225 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
227
228 "TO_TIMESTAMP" if f.args.len() == 1 => {
230 Ok(Expression::Function(Box::new(Function::new(
232 "CAST".to_string(),
233 f.args,
234 ))))
235 }
236
237 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
239
240 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
242 "DATE_FORMAT".to_string(),
243 f.args,
244 )))),
245
246 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
248 "DATE_FORMAT".to_string(),
249 f.args,
250 )))),
251
252 "DATE_TRUNC" => Ok(Expression::Function(Box::new(Function::new(
254 "TRUNC".to_string(),
255 f.args,
256 )))),
257
258 "TRUNC" => Ok(Expression::Function(Box::new(f))),
260
261 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
263
264 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
266 "EXTRACT".to_string(),
267 f.args,
268 )))),
269
270 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
272
273 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
275
276 "POSITION" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
278 "LOCATE".to_string(),
279 f.args,
280 )))),
281
282 "STRPOS" if f.args.len() == 2 => {
284 let mut args = f.args;
285 let first = args.remove(0);
286 let second = args.remove(0);
287 Ok(Expression::Function(Box::new(Function::new(
289 "LOCATE".to_string(),
290 vec![second, first],
291 ))))
292 }
293
294 "CHARINDEX" if f.args.len() >= 2 => {
296 let mut args = f.args;
297 let substring = args.remove(0);
298 let string = args.remove(0);
299 let mut locate_args = vec![substring, string];
301 if !args.is_empty() {
302 locate_args.push(args.remove(0));
303 }
304 Ok(Expression::Function(Box::new(Function::new(
305 "LOCATE".to_string(),
306 locate_args,
307 ))))
308 }
309
310 "INSTR" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
312 "LOCATE".to_string(),
313 f.args,
314 )))),
315
316 "LOCATE" => Ok(Expression::Function(Box::new(f))),
318
319 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
321 this: f.args.into_iter().next().unwrap(),
322 decimals: None,
323 to: None,
324 }))),
325
326 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
328 this: f.args.into_iter().next().unwrap(),
329 decimals: None,
330 to: None,
331 }))),
332
333 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
335 "EXPLODE".to_string(),
336 f.args,
337 )))),
338
339 "FLATTEN" => Ok(Expression::Function(Box::new(Function::new(
341 "EXPLODE".to_string(),
342 f.args,
343 )))),
344
345 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
347 "COLLECT_LIST".to_string(),
348 f.args,
349 )))),
350
351 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
353
354 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
356
357 "ARRAY_LENGTH" | "ARRAY_SIZE" | "CARDINALITY" => {
359 Ok(Expression::Function(Box::new(Function::new(
360 "SIZE".to_string(),
361 f.args,
362 ))))
363 }
364
365 "SIZE" => Ok(Expression::Function(Box::new(f))),
367
368 "SPLIT" => Ok(Expression::Function(Box::new(f))),
370
371 "REGEXP_REPLACE" if f.args.len() > 3 => {
373 let args = f.args[..3].to_vec();
374 Ok(Expression::Function(Box::new(Function::new("REGEXP_REPLACE".to_string(), args))))
375 }
376 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
378
379 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
381 let subject = f.args[0].clone();
382 let pattern = f.args[1].clone();
383 let group = if f.args.len() >= 6 {
384 let g = &f.args[5];
385 if matches!(g, Expression::Literal(crate::expressions::Literal::Number(n)) if n == "1") {
386 None
387 } else {
388 Some(g.clone())
389 }
390 } else {
391 None
392 };
393 let mut args = vec![subject, pattern];
394 if let Some(g) = group {
395 args.push(g);
396 }
397 Ok(Expression::Function(Box::new(Function::new("REGEXP_EXTRACT".to_string(), args))))
398 }
399
400 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
402
403 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
405 "RLIKE".to_string(),
406 f.args,
407 )))),
408
409 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
411 "GET_JSON_OBJECT".to_string(),
412 f.args,
413 )))),
414
415 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
417 "GET_JSON_OBJECT".to_string(),
418 f.args,
419 )))),
420
421 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
423
424 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
427 "FROM_JSON".to_string(),
428 f.args,
429 )))),
430
431 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
433
434 "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
436
437 "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
439
440 "DATE_SUB" if f.args.len() == 2 => {
443 let mut args = f.args;
444 let date_arg = args.remove(0);
445 let interval_arg = args.remove(0);
446
447 let effective_interval = match &interval_arg {
449 Expression::Literal(_) | Expression::Column(_) | Expression::Identifier(_) => interval_arg,
450 _ => Expression::Paren(Box::new(Paren {
451 this: interval_arg,
452 trailing_comments: Vec::new(),
453 })),
454 };
455
456 let negated_interval = Expression::Mul(Box::new(BinaryOp {
458 left: effective_interval,
459 right: Expression::Literal(Literal::Number("-1".to_string())),
460 left_comments: Vec::new(),
461 operator_comments: Vec::new(),
462 trailing_comments: Vec::new(),
463 }));
464
465 Ok(Expression::Function(Box::new(Function::new(
466 "DATE_ADD".to_string(),
467 vec![date_arg, negated_interval],
468 ))))
469 }
470
471 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
473
474 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
476
477 "NVL" => Ok(Expression::Function(Box::new(f))),
479
480 "NVL2" => Ok(Expression::Function(Box::new(f))),
482
483 "MAP" => Ok(Expression::Function(Box::new(f))),
485
486 "ARRAY" => Ok(Expression::Function(Box::new(f))),
488
489 "STRUCT" => Ok(Expression::Function(Box::new(f))),
491
492 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
494
495 "DATE_PART" if f.args.len() == 2 => {
497 let mut args = f.args;
498 let part = args.remove(0);
499 let expr = args.remove(0);
500 if let Some(field) = hive_expr_to_datetime_field(&part) {
501 Ok(Expression::Extract(Box::new(ExtractFunc {
502 this: expr,
503 field,
504 })))
505 } else {
506 Ok(Expression::Function(Box::new(Function::new(
507 "DATE_PART".to_string(),
508 vec![part, expr],
509 ))))
510 }
511 }
512
513 _ => Ok(Expression::Function(Box::new(f))),
515 }
516 }
517
518 fn transform_aggregate_function(
519 &self,
520 f: Box<crate::expressions::AggregateFunction>,
521 ) -> Result<Expression> {
522 let name_upper = f.name.to_uppercase();
523 match name_upper.as_str() {
524 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
526 Function::new("COLLECT_LIST".to_string(), f.args),
527 ))),
528
529 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
531 "COLLECT_LIST".to_string(),
532 f.args,
533 )))),
534
535 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
537 "COLLECT_LIST".to_string(),
538 f.args,
539 )))),
540
541 "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
543 "COLLECT_LIST".to_string(),
544 f.args,
545 )))),
546
547 _ => Ok(Expression::AggregateFunction(f)),
549 }
550 }
551}
552
553fn hive_expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
555 let name = match expr {
556 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
557 Expression::Identifier(id) => id.name.to_uppercase(),
558 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
559 _ => return None,
560 };
561 match name.as_str() {
562 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
563 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
564 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
565 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
566 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
567 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
568 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
569 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
570 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
571 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
572 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
573 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
574 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
575 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
576 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
577 _ => Some(DateTimeField::Custom(name)),
578 }
579}