1use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{
18 BinaryOp, CeilFunc, DateTimeField, Expression, ExtractFunc, Function, LikeOp, Literal, Paren,
19 UnaryFunc, VarArgFunc,
20};
21#[cfg(feature = "generate")]
22use crate::generator::GeneratorConfig;
23use crate::tokens::TokenizerConfig;
24
25pub struct HiveDialect;
27
28impl DialectImpl for HiveDialect {
29 fn dialect_type(&self) -> DialectType {
30 DialectType::Hive
31 }
32
33 fn tokenizer_config(&self) -> TokenizerConfig {
34 let mut config = TokenizerConfig::default();
35 config.identifiers.clear();
37 config.identifiers.insert('`', '`');
38 config.quotes.insert("\"".to_string(), "\"".to_string());
40 config.string_escapes.push('\\');
42 config
44 .keywords
45 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
46 config
47 .keywords
48 .insert("REPAIR".to_string(), crate::tokens::TokenType::Command);
49 config
50 .keywords
51 .insert("MSCK".to_string(), crate::tokens::TokenType::Command);
52 config
54 .numeric_literals
55 .insert("L".to_string(), "BIGINT".to_string());
56 config
57 .numeric_literals
58 .insert("S".to_string(), "SMALLINT".to_string());
59 config
60 .numeric_literals
61 .insert("Y".to_string(), "TINYINT".to_string());
62 config
63 .numeric_literals
64 .insert("D".to_string(), "DOUBLE".to_string());
65 config
66 .numeric_literals
67 .insert("F".to_string(), "FLOAT".to_string());
68 config
69 .numeric_literals
70 .insert("BD".to_string(), "DECIMAL".to_string());
71 config.identifiers_can_start_with_digit = true;
73 config
74 }
75
76 #[cfg(feature = "generate")]
77
78 fn generator_config(&self) -> GeneratorConfig {
79 use crate::generator::IdentifierQuoteStyle;
80 GeneratorConfig {
81 identifier_quote: '`',
82 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
83 dialect: Some(DialectType::Hive),
84 struct_field_sep: ": ",
86 alias_post_tablesample: true,
88 join_hints: false,
89 identifiers_can_start_with_digit: true,
90 schema_comment_with_eq: false,
92 ..Default::default()
93 }
94 }
95
96 #[cfg(feature = "transpile")]
97
98 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
99 match expr {
100 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
102 original_name: None,
103 expressions: vec![f.this, f.expression],
104 inferred_type: None,
105 }))),
106
107 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
109 original_name: None,
110 expressions: vec![f.this, f.expression],
111 inferred_type: None,
112 }))),
113
114 Expression::SafeCast(c) => Ok(Expression::Cast(c)),
119
120 Expression::ILike(op) => {
122 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
123 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
124 Ok(Expression::Like(Box::new(LikeOp {
125 left: lower_left,
126 right: lower_right,
127 escape: op.escape,
128 quantifier: op.quantifier,
129 inferred_type: None,
130 })))
131 }
132
133 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
135
136 Expression::Explode(f) => Ok(Expression::Explode(f)),
138
139 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
141
142 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
144 seed: None,
145 lower: None,
146 upper: None,
147 }))),
148
149 Expression::Rand(r) => Ok(Expression::Rand(r)),
151
152 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
154 "CONCAT".to_string(),
155 vec![op.left, op.right],
156 )))),
157
158 Expression::Function(f) => self.transform_function(*f),
160
161 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
163
164 _ => Ok(expr),
166 }
167 }
168}
169
170#[cfg(feature = "transpile")]
171impl HiveDialect {
172 fn transform_function(&self, f: Function) -> Result<Expression> {
173 let name_upper = f.name.to_uppercase();
174 match name_upper.as_str() {
175 "LOG" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
177 "LN".to_string(),
178 f.args,
179 )))),
180
181 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
183 original_name: None,
184 expressions: f.args,
185 inferred_type: None,
186 }))),
187
188 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
190 original_name: None,
191 expressions: f.args,
192 inferred_type: None,
193 }))),
194
195 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
197 original_name: None,
198 expressions: f.args,
199 inferred_type: None,
200 }))),
201
202 "GROUP_CONCAT" if !f.args.is_empty() => {
205 Ok(Expression::Function(Box::new(Function::new(
208 "COLLECT_LIST".to_string(),
209 f.args,
210 ))))
211 }
212
213 "STRING_AGG" if !f.args.is_empty() => {
215 Ok(Expression::Function(Box::new(Function::new(
217 "COLLECT_LIST".to_string(),
218 f.args,
219 ))))
220 }
221
222 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
224 "COLLECT_LIST".to_string(),
225 f.args,
226 )))),
227
228 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
230
231 "LENGTH" => Ok(Expression::Function(Box::new(f))),
233
234 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
236 f.args.into_iter().next().unwrap(),
237 )))),
238
239 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
241 seed: None,
242 lower: None,
243 upper: None,
244 }))),
245
246 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
248 seed: None,
249 lower: None,
250 upper: None,
251 }))),
252
253 "NOW" => Ok(Expression::CurrentTimestamp(
255 crate::expressions::CurrentTimestamp {
256 precision: None,
257 sysdate: false,
258 },
259 )),
260
261 "GETDATE" => Ok(Expression::CurrentTimestamp(
263 crate::expressions::CurrentTimestamp {
264 precision: None,
265 sysdate: false,
266 },
267 )),
268
269 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
271 crate::expressions::CurrentTimestamp {
272 precision: None,
273 sysdate: false,
274 },
275 )),
276
277 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
279
280 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
282
283 "TO_TIMESTAMP" if f.args.len() == 1 => {
285 Ok(Expression::Function(Box::new(Function::new(
287 "CAST".to_string(),
288 f.args,
289 ))))
290 }
291
292 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
294
295 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
297 "DATE_FORMAT".to_string(),
298 f.args,
299 )))),
300
301 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
303 "DATE_FORMAT".to_string(),
304 f.args,
305 )))),
306
307 "DATE_TRUNC" => Ok(Expression::Function(Box::new(Function::new(
309 "TRUNC".to_string(),
310 f.args,
311 )))),
312
313 "TRUNC" => Ok(Expression::Function(Box::new(f))),
315
316 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
318
319 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
321 "EXTRACT".to_string(),
322 f.args,
323 )))),
324
325 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
327
328 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
330
331 "POSITION" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
333 "LOCATE".to_string(),
334 f.args,
335 )))),
336
337 "STRPOS" if f.args.len() == 2 => {
339 let mut args = f.args;
340 let first = args.remove(0);
341 let second = args.remove(0);
342 Ok(Expression::Function(Box::new(Function::new(
344 "LOCATE".to_string(),
345 vec![second, first],
346 ))))
347 }
348
349 "CHARINDEX" if f.args.len() >= 2 => {
351 let mut args = f.args;
352 let substring = args.remove(0);
353 let string = args.remove(0);
354 let mut locate_args = vec![substring, string];
356 if !args.is_empty() {
357 locate_args.push(args.remove(0));
358 }
359 Ok(Expression::Function(Box::new(Function::new(
360 "LOCATE".to_string(),
361 locate_args,
362 ))))
363 }
364
365 "INSTR" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
367 "LOCATE".to_string(),
368 f.args,
369 )))),
370
371 "LOCATE" => Ok(Expression::Function(Box::new(f))),
373
374 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
376 this: f.args.into_iter().next().unwrap(),
377 decimals: None,
378 to: None,
379 }))),
380
381 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
383 this: f.args.into_iter().next().unwrap(),
384 decimals: None,
385 to: None,
386 }))),
387
388 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
390 "EXPLODE".to_string(),
391 f.args,
392 )))),
393
394 "FLATTEN" => Ok(Expression::Function(Box::new(Function::new(
396 "EXPLODE".to_string(),
397 f.args,
398 )))),
399
400 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
402 "COLLECT_LIST".to_string(),
403 f.args,
404 )))),
405
406 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
408
409 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
411
412 "ARRAY_LENGTH" | "ARRAY_SIZE" | "CARDINALITY" => Ok(Expression::Function(Box::new(
414 Function::new("SIZE".to_string(), f.args),
415 ))),
416
417 "SIZE" => Ok(Expression::Function(Box::new(f))),
419
420 "SPLIT" => Ok(Expression::Function(Box::new(f))),
422
423 "REGEXP_REPLACE" if f.args.len() > 3 => {
425 let args = f.args[..3].to_vec();
426 Ok(Expression::Function(Box::new(Function::new(
427 "REGEXP_REPLACE".to_string(),
428 args,
429 ))))
430 }
431 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
433
434 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
436 let subject = f.args[0].clone();
437 let pattern = f.args[1].clone();
438 let group = if f.args.len() >= 6 {
439 let g = &f.args[5];
440 if matches!(g, Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::Number(n) if n == "1"))
441 {
442 None
443 } else {
444 Some(g.clone())
445 }
446 } else {
447 None
448 };
449 let mut args = vec![subject, pattern];
450 if let Some(g) = group {
451 args.push(g);
452 }
453 Ok(Expression::Function(Box::new(Function::new(
454 "REGEXP_EXTRACT".to_string(),
455 args,
456 ))))
457 }
458
459 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
461
462 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
464 "RLIKE".to_string(),
465 f.args,
466 )))),
467
468 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
470 "GET_JSON_OBJECT".to_string(),
471 f.args,
472 )))),
473
474 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
476 "GET_JSON_OBJECT".to_string(),
477 f.args,
478 )))),
479
480 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
482
483 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
486 "FROM_JSON".to_string(),
487 f.args,
488 )))),
489
490 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
492
493 "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
495
496 "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
498
499 "DATE_SUB" if f.args.len() == 2 => {
502 let mut args = f.args;
503 let date_arg = args.remove(0);
504 let interval_arg = args.remove(0);
505
506 let effective_interval = match &interval_arg {
508 Expression::Literal(_) | Expression::Column(_) | Expression::Identifier(_) => {
509 interval_arg
510 }
511 _ => Expression::Paren(Box::new(Paren {
512 this: interval_arg,
513 trailing_comments: Vec::new(),
514 })),
515 };
516
517 let negated_interval = Expression::Mul(Box::new(BinaryOp {
519 left: effective_interval,
520 right: Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
521 left_comments: Vec::new(),
522 operator_comments: Vec::new(),
523 trailing_comments: Vec::new(),
524 inferred_type: None,
525 }));
526
527 Ok(Expression::Function(Box::new(Function::new(
528 "DATE_ADD".to_string(),
529 vec![date_arg, negated_interval],
530 ))))
531 }
532
533 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
535
536 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
538
539 "NVL" => Ok(Expression::Function(Box::new(f))),
541
542 "NVL2" => Ok(Expression::Function(Box::new(f))),
544
545 "MAP" => Ok(Expression::Function(Box::new(f))),
547
548 "ARRAY" => Ok(Expression::Function(Box::new(f))),
550
551 "STRUCT" => Ok(Expression::Function(Box::new(f))),
553
554 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
556
557 "DATE_PART" if f.args.len() == 2 => {
559 let mut args = f.args;
560 let part = args.remove(0);
561 let expr = args.remove(0);
562 if let Some(field) = hive_expr_to_datetime_field(&part) {
563 Ok(Expression::Extract(Box::new(ExtractFunc {
564 this: expr,
565 field,
566 })))
567 } else {
568 Ok(Expression::Function(Box::new(Function::new(
569 "DATE_PART".to_string(),
570 vec![part, expr],
571 ))))
572 }
573 }
574
575 _ => Ok(Expression::Function(Box::new(f))),
577 }
578 }
579
580 fn transform_aggregate_function(
581 &self,
582 f: Box<crate::expressions::AggregateFunction>,
583 ) -> Result<Expression> {
584 let name_upper = f.name.to_uppercase();
585 match name_upper.as_str() {
586 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
588 Function::new("COLLECT_LIST".to_string(), f.args),
589 ))),
590
591 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
593 Function::new("COLLECT_LIST".to_string(), f.args),
594 ))),
595
596 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
598 "COLLECT_LIST".to_string(),
599 f.args,
600 )))),
601
602 "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
604 "COLLECT_LIST".to_string(),
605 f.args,
606 )))),
607
608 _ => Ok(Expression::AggregateFunction(f)),
610 }
611 }
612}
613
614fn hive_expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
616 let name = match expr {
617 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
618 let Literal::String(s) = lit.as_ref() else {
619 unreachable!()
620 };
621 s.to_uppercase()
622 }
623 Expression::Identifier(id) => id.name.to_uppercase(),
624 Expression::Var(v) => v.this.to_uppercase(),
625 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
626 _ => return None,
627 };
628 match name.as_str() {
629 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
630 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
631 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
632 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
633 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
634 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
635 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
636 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
637 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
638 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
639 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
640 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
641 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
642 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
643 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
644 _ => Some(DateTimeField::Custom(name)),
645 }
646}