1use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{
18 BinaryOp, CeilFunc, DateTimeField, Expression, ExtractFunc, Function, LikeOp, Literal, Paren,
19 UnaryFunc, VarArgFunc,
20};
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24pub struct HiveDialect;
26
27impl DialectImpl for HiveDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Hive
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 let mut config = TokenizerConfig::default();
34 config.identifiers.clear();
36 config.identifiers.insert('`', '`');
37 config.quotes.insert("\"".to_string(), "\"".to_string());
39 config.string_escapes.push('\\');
41 config
43 .keywords
44 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
45 config
46 .keywords
47 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
48 config
49 .keywords
50 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
51 config
53 .numeric_literals
54 .insert("L".to_string(), "BIGINT".to_string());
55 config
56 .numeric_literals
57 .insert("S".to_string(), "SMALLINT".to_string());
58 config
59 .numeric_literals
60 .insert("Y".to_string(), "TINYINT".to_string());
61 config
62 .numeric_literals
63 .insert("D".to_string(), "DOUBLE".to_string());
64 config
65 .numeric_literals
66 .insert("F".to_string(), "FLOAT".to_string());
67 config
68 .numeric_literals
69 .insert("BD".to_string(), "DECIMAL".to_string());
70 config.identifiers_can_start_with_digit = true;
72 config
73 }
74
75 fn generator_config(&self) -> GeneratorConfig {
76 use crate::generator::IdentifierQuoteStyle;
77 GeneratorConfig {
78 identifier_quote: '`',
79 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
80 dialect: Some(DialectType::Hive),
81 struct_field_sep: ": ",
83 alias_post_tablesample: true,
85 join_hints: false,
86 identifiers_can_start_with_digit: true,
87 schema_comment_with_eq: false,
89 ..Default::default()
90 }
91 }
92
93 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
94 match expr {
95 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
97 original_name: None,
98 expressions: vec![f.this, f.expression],
99 inferred_type: None,
100 }))),
101
102 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
104 original_name: None,
105 expressions: vec![f.this, f.expression],
106 inferred_type: None,
107 }))),
108
109 Expression::SafeCast(c) => Ok(Expression::Cast(c)),
114
115 Expression::ILike(op) => {
117 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
118 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
119 Ok(Expression::Like(Box::new(LikeOp {
120 left: lower_left,
121 right: lower_right,
122 escape: op.escape,
123 quantifier: op.quantifier,
124 inferred_type: None,
125 })))
126 }
127
128 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
130
131 Expression::Explode(f) => Ok(Expression::Explode(f)),
133
134 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
136
137 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
139 seed: None,
140 lower: None,
141 upper: None,
142 }))),
143
144 Expression::Rand(r) => Ok(Expression::Rand(r)),
146
147 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
149 "CONCAT".to_string(),
150 vec![op.left, op.right],
151 )))),
152
153 Expression::Function(f) => self.transform_function(*f),
155
156 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
158
159 _ => Ok(expr),
161 }
162 }
163}
164
165impl HiveDialect {
166 fn transform_function(&self, f: Function) -> Result<Expression> {
167 let name_upper = f.name.to_uppercase();
168 match name_upper.as_str() {
169 "LOG" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
171 "LN".to_string(),
172 f.args,
173 )))),
174
175 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
177 original_name: None,
178 expressions: f.args,
179 inferred_type: None,
180 }))),
181
182 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
184 original_name: None,
185 expressions: f.args,
186 inferred_type: None,
187 }))),
188
189 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
191 original_name: None,
192 expressions: f.args,
193 inferred_type: None,
194 }))),
195
196 "GROUP_CONCAT" if !f.args.is_empty() => {
199 Ok(Expression::Function(Box::new(Function::new(
202 "COLLECT_LIST".to_string(),
203 f.args,
204 ))))
205 }
206
207 "STRING_AGG" if !f.args.is_empty() => {
209 Ok(Expression::Function(Box::new(Function::new(
211 "COLLECT_LIST".to_string(),
212 f.args,
213 ))))
214 }
215
216 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
218 "COLLECT_LIST".to_string(),
219 f.args,
220 )))),
221
222 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
224
225 "LENGTH" => Ok(Expression::Function(Box::new(f))),
227
228 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
230 f.args.into_iter().next().unwrap(),
231 )))),
232
233 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
235 seed: None,
236 lower: None,
237 upper: None,
238 }))),
239
240 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
242 seed: None,
243 lower: None,
244 upper: None,
245 }))),
246
247 "NOW" => Ok(Expression::CurrentTimestamp(
249 crate::expressions::CurrentTimestamp {
250 precision: None,
251 sysdate: false,
252 },
253 )),
254
255 "GETDATE" => Ok(Expression::CurrentTimestamp(
257 crate::expressions::CurrentTimestamp {
258 precision: None,
259 sysdate: false,
260 },
261 )),
262
263 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
265 crate::expressions::CurrentTimestamp {
266 precision: None,
267 sysdate: false,
268 },
269 )),
270
271 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
273
274 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
276
277 "TO_TIMESTAMP" if f.args.len() == 1 => {
279 Ok(Expression::Function(Box::new(Function::new(
281 "CAST".to_string(),
282 f.args,
283 ))))
284 }
285
286 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
288
289 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
291 "DATE_FORMAT".to_string(),
292 f.args,
293 )))),
294
295 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
297 "DATE_FORMAT".to_string(),
298 f.args,
299 )))),
300
301 "DATE_TRUNC" => Ok(Expression::Function(Box::new(Function::new(
303 "TRUNC".to_string(),
304 f.args,
305 )))),
306
307 "TRUNC" => Ok(Expression::Function(Box::new(f))),
309
310 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
312
313 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
315 "EXTRACT".to_string(),
316 f.args,
317 )))),
318
319 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
321
322 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
324
325 "POSITION" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
327 "LOCATE".to_string(),
328 f.args,
329 )))),
330
331 "STRPOS" if f.args.len() == 2 => {
333 let mut args = f.args;
334 let first = args.remove(0);
335 let second = args.remove(0);
336 Ok(Expression::Function(Box::new(Function::new(
338 "LOCATE".to_string(),
339 vec![second, first],
340 ))))
341 }
342
343 "CHARINDEX" if f.args.len() >= 2 => {
345 let mut args = f.args;
346 let substring = args.remove(0);
347 let string = args.remove(0);
348 let mut locate_args = vec![substring, string];
350 if !args.is_empty() {
351 locate_args.push(args.remove(0));
352 }
353 Ok(Expression::Function(Box::new(Function::new(
354 "LOCATE".to_string(),
355 locate_args,
356 ))))
357 }
358
359 "INSTR" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
361 "LOCATE".to_string(),
362 f.args,
363 )))),
364
365 "LOCATE" => Ok(Expression::Function(Box::new(f))),
367
368 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
370 this: f.args.into_iter().next().unwrap(),
371 decimals: None,
372 to: None,
373 }))),
374
375 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
377 this: f.args.into_iter().next().unwrap(),
378 decimals: None,
379 to: None,
380 }))),
381
382 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
384 "EXPLODE".to_string(),
385 f.args,
386 )))),
387
388 "FLATTEN" => Ok(Expression::Function(Box::new(Function::new(
390 "EXPLODE".to_string(),
391 f.args,
392 )))),
393
394 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
396 "COLLECT_LIST".to_string(),
397 f.args,
398 )))),
399
400 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
402
403 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
405
406 "ARRAY_LENGTH" | "ARRAY_SIZE" | "CARDINALITY" => Ok(Expression::Function(Box::new(
408 Function::new("SIZE".to_string(), f.args),
409 ))),
410
411 "SIZE" => Ok(Expression::Function(Box::new(f))),
413
414 "SPLIT" => Ok(Expression::Function(Box::new(f))),
416
417 "REGEXP_REPLACE" if f.args.len() > 3 => {
419 let args = f.args[..3].to_vec();
420 Ok(Expression::Function(Box::new(Function::new(
421 "REGEXP_REPLACE".to_string(),
422 args,
423 ))))
424 }
425 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
427
428 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
430 let subject = f.args[0].clone();
431 let pattern = f.args[1].clone();
432 let group = if f.args.len() >= 6 {
433 let g = &f.args[5];
434 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::Number(n) if n == "1"))
435 {
436 None
437 } else {
438 Some(g.clone())
439 }
440 } else {
441 None
442 };
443 let mut args = vec![subject, pattern];
444 if let Some(g) = group {
445 args.push(g);
446 }
447 Ok(Expression::Function(Box::new(Function::new(
448 "REGEXP_EXTRACT".to_string(),
449 args,
450 ))))
451 }
452
453 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
455
456 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
458 "RLIKE".to_string(),
459 f.args,
460 )))),
461
462 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
464 "GET_JSON_OBJECT".to_string(),
465 f.args,
466 )))),
467
468 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
470 "GET_JSON_OBJECT".to_string(),
471 f.args,
472 )))),
473
474 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
476
477 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
480 "FROM_JSON".to_string(),
481 f.args,
482 )))),
483
484 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
486
487 "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
489
490 "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
492
493 "DATE_SUB" if f.args.len() == 2 => {
496 let mut args = f.args;
497 let date_arg = args.remove(0);
498 let interval_arg = args.remove(0);
499
500 let effective_interval = match &interval_arg {
502 Expression::Literal(_) | Expression::Column(_) | Expression::Identifier(_) => {
503 interval_arg
504 }
505 _ => Expression::Paren(Box::new(Paren {
506 this: interval_arg,
507 trailing_comments: Vec::new(),
508 })),
509 };
510
511 let negated_interval = Expression::Mul(Box::new(BinaryOp {
513 left: effective_interval,
514 right: Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
515 left_comments: Vec::new(),
516 operator_comments: Vec::new(),
517 trailing_comments: Vec::new(),
518 inferred_type: None,
519 }));
520
521 Ok(Expression::Function(Box::new(Function::new(
522 "DATE_ADD".to_string(),
523 vec![date_arg, negated_interval],
524 ))))
525 }
526
527 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
529
530 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
532
533 "NVL" => Ok(Expression::Function(Box::new(f))),
535
536 "NVL2" => Ok(Expression::Function(Box::new(f))),
538
539 "MAP" => Ok(Expression::Function(Box::new(f))),
541
542 "ARRAY" => Ok(Expression::Function(Box::new(f))),
544
545 "STRUCT" => Ok(Expression::Function(Box::new(f))),
547
548 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
550
551 "DATE_PART" if f.args.len() == 2 => {
553 let mut args = f.args;
554 let part = args.remove(0);
555 let expr = args.remove(0);
556 if let Some(field) = hive_expr_to_datetime_field(&part) {
557 Ok(Expression::Extract(Box::new(ExtractFunc {
558 this: expr,
559 field,
560 })))
561 } else {
562 Ok(Expression::Function(Box::new(Function::new(
563 "DATE_PART".to_string(),
564 vec![part, expr],
565 ))))
566 }
567 }
568
569 _ => Ok(Expression::Function(Box::new(f))),
571 }
572 }
573
574 fn transform_aggregate_function(
575 &self,
576 f: Box<crate::expressions::AggregateFunction>,
577 ) -> Result<Expression> {
578 let name_upper = f.name.to_uppercase();
579 match name_upper.as_str() {
580 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
582 Function::new("COLLECT_LIST".to_string(), f.args),
583 ))),
584
585 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
587 Function::new("COLLECT_LIST".to_string(), f.args),
588 ))),
589
590 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
592 "COLLECT_LIST".to_string(),
593 f.args,
594 )))),
595
596 "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
598 "COLLECT_LIST".to_string(),
599 f.args,
600 )))),
601
602 _ => Ok(Expression::AggregateFunction(f)),
604 }
605 }
606}
607
608fn hive_expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
610 let name = match expr {
611 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
612 let Literal::String(s) = lit.as_ref() else {
613 unreachable!()
614 };
615 s.to_uppercase()
616 }
617 Expression::Identifier(id) => id.name.to_uppercase(),
618 Expression::Var(v) => v.this.to_uppercase(),
619 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
620 _ => return None,
621 };
622 match name.as_str() {
623 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
624 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
625 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
626 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
627 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
628 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
629 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
630 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
631 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
632 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
633 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
634 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
635 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
636 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
637 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
638 _ => Some(DateTimeField::Custom(name)),
639 }
640}