1use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{
18 BinaryOp, CeilFunc, DateTimeField, Expression, ExtractFunc, Function, LikeOp, Literal, Paren,
19 UnaryFunc, VarArgFunc,
20};
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24pub struct HiveDialect;
26
27impl DialectImpl for HiveDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Hive
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 let mut config = TokenizerConfig::default();
34 config.identifiers.clear();
36 config.identifiers.insert('`', '`');
37 config.quotes.insert("\"".to_string(), "\"".to_string());
39 config.string_escapes.push('\\');
41 config
43 .keywords
44 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
45 config
47 .numeric_literals
48 .insert("L".to_string(), "BIGINT".to_string());
49 config
50 .numeric_literals
51 .insert("S".to_string(), "SMALLINT".to_string());
52 config
53 .numeric_literals
54 .insert("Y".to_string(), "TINYINT".to_string());
55 config
56 .numeric_literals
57 .insert("D".to_string(), "DOUBLE".to_string());
58 config
59 .numeric_literals
60 .insert("F".to_string(), "FLOAT".to_string());
61 config
62 .numeric_literals
63 .insert("BD".to_string(), "DECIMAL".to_string());
64 config.identifiers_can_start_with_digit = true;
66 config
67 }
68
69 fn generator_config(&self) -> GeneratorConfig {
70 use crate::generator::IdentifierQuoteStyle;
71 GeneratorConfig {
72 identifier_quote: '`',
73 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
74 dialect: Some(DialectType::Hive),
75 struct_field_sep: ": ",
77 alias_post_tablesample: true,
79 join_hints: false,
80 identifiers_can_start_with_digit: true,
81 schema_comment_with_eq: false,
83 ..Default::default()
84 }
85 }
86
87 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
88 match expr {
89 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
91 original_name: None,
92 expressions: vec![f.this, f.expression],
93 inferred_type: None,
94 }))),
95
96 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
98 original_name: None,
99 expressions: vec![f.this, f.expression],
100 inferred_type: None,
101 }))),
102
103 Expression::SafeCast(c) => Ok(Expression::Cast(c)),
108
109 Expression::ILike(op) => {
111 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
112 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
113 Ok(Expression::Like(Box::new(LikeOp {
114 left: lower_left,
115 right: lower_right,
116 escape: op.escape,
117 quantifier: op.quantifier,
118 inferred_type: None,
119 })))
120 }
121
122 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
124
125 Expression::Explode(f) => Ok(Expression::Explode(f)),
127
128 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
130
131 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
133 seed: None,
134 lower: None,
135 upper: None,
136 }))),
137
138 Expression::Rand(r) => Ok(Expression::Rand(r)),
140
141 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
143 "CONCAT".to_string(),
144 vec![op.left, op.right],
145 )))),
146
147 Expression::Function(f) => self.transform_function(*f),
149
150 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
152
153 _ => Ok(expr),
155 }
156 }
157}
158
159impl HiveDialect {
160 fn transform_function(&self, f: Function) -> Result<Expression> {
161 let name_upper = f.name.to_uppercase();
162 match name_upper.as_str() {
163 "LOG" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
165 "LN".to_string(),
166 f.args,
167 )))),
168
169 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
171 original_name: None,
172 expressions: f.args,
173 inferred_type: None,
174 }))),
175
176 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
178 original_name: None,
179 expressions: f.args,
180 inferred_type: None,
181 }))),
182
183 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
185 original_name: None,
186 expressions: f.args,
187 inferred_type: None,
188 }))),
189
190 "GROUP_CONCAT" if !f.args.is_empty() => {
193 Ok(Expression::Function(Box::new(Function::new(
196 "COLLECT_LIST".to_string(),
197 f.args,
198 ))))
199 }
200
201 "STRING_AGG" if !f.args.is_empty() => {
203 Ok(Expression::Function(Box::new(Function::new(
205 "COLLECT_LIST".to_string(),
206 f.args,
207 ))))
208 }
209
210 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
212 "COLLECT_LIST".to_string(),
213 f.args,
214 )))),
215
216 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
218
219 "LENGTH" => Ok(Expression::Function(Box::new(f))),
221
222 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
224 f.args.into_iter().next().unwrap(),
225 )))),
226
227 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
229 seed: None,
230 lower: None,
231 upper: None,
232 }))),
233
234 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
236 seed: None,
237 lower: None,
238 upper: None,
239 }))),
240
241 "NOW" => Ok(Expression::CurrentTimestamp(
243 crate::expressions::CurrentTimestamp {
244 precision: None,
245 sysdate: false,
246 },
247 )),
248
249 "GETDATE" => Ok(Expression::CurrentTimestamp(
251 crate::expressions::CurrentTimestamp {
252 precision: None,
253 sysdate: false,
254 },
255 )),
256
257 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
259 crate::expressions::CurrentTimestamp {
260 precision: None,
261 sysdate: false,
262 },
263 )),
264
265 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
267
268 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
270
271 "TO_TIMESTAMP" if f.args.len() == 1 => {
273 Ok(Expression::Function(Box::new(Function::new(
275 "CAST".to_string(),
276 f.args,
277 ))))
278 }
279
280 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
282
283 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
285 "DATE_FORMAT".to_string(),
286 f.args,
287 )))),
288
289 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
291 "DATE_FORMAT".to_string(),
292 f.args,
293 )))),
294
295 "DATE_TRUNC" => Ok(Expression::Function(Box::new(Function::new(
297 "TRUNC".to_string(),
298 f.args,
299 )))),
300
301 "TRUNC" => Ok(Expression::Function(Box::new(f))),
303
304 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
306
307 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
309 "EXTRACT".to_string(),
310 f.args,
311 )))),
312
313 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
315
316 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
318
319 "POSITION" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
321 "LOCATE".to_string(),
322 f.args,
323 )))),
324
325 "STRPOS" if f.args.len() == 2 => {
327 let mut args = f.args;
328 let first = args.remove(0);
329 let second = args.remove(0);
330 Ok(Expression::Function(Box::new(Function::new(
332 "LOCATE".to_string(),
333 vec![second, first],
334 ))))
335 }
336
337 "CHARINDEX" if f.args.len() >= 2 => {
339 let mut args = f.args;
340 let substring = args.remove(0);
341 let string = args.remove(0);
342 let mut locate_args = vec![substring, string];
344 if !args.is_empty() {
345 locate_args.push(args.remove(0));
346 }
347 Ok(Expression::Function(Box::new(Function::new(
348 "LOCATE".to_string(),
349 locate_args,
350 ))))
351 }
352
353 "INSTR" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
355 "LOCATE".to_string(),
356 f.args,
357 )))),
358
359 "LOCATE" => Ok(Expression::Function(Box::new(f))),
361
362 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
364 this: f.args.into_iter().next().unwrap(),
365 decimals: None,
366 to: None,
367 }))),
368
369 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
371 this: f.args.into_iter().next().unwrap(),
372 decimals: None,
373 to: None,
374 }))),
375
376 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
378 "EXPLODE".to_string(),
379 f.args,
380 )))),
381
382 "FLATTEN" => Ok(Expression::Function(Box::new(Function::new(
384 "EXPLODE".to_string(),
385 f.args,
386 )))),
387
388 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
390 "COLLECT_LIST".to_string(),
391 f.args,
392 )))),
393
394 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
396
397 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
399
400 "ARRAY_LENGTH" | "ARRAY_SIZE" | "CARDINALITY" => Ok(Expression::Function(Box::new(
402 Function::new("SIZE".to_string(), f.args),
403 ))),
404
405 "SIZE" => Ok(Expression::Function(Box::new(f))),
407
408 "SPLIT" => Ok(Expression::Function(Box::new(f))),
410
411 "REGEXP_REPLACE" if f.args.len() > 3 => {
413 let args = f.args[..3].to_vec();
414 Ok(Expression::Function(Box::new(Function::new(
415 "REGEXP_REPLACE".to_string(),
416 args,
417 ))))
418 }
419 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
421
422 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
424 let subject = f.args[0].clone();
425 let pattern = f.args[1].clone();
426 let group = if f.args.len() >= 6 {
427 let g = &f.args[5];
428 if matches!(g, Expression::Literal(crate::expressions::Literal::Number(n)) if n == "1")
429 {
430 None
431 } else {
432 Some(g.clone())
433 }
434 } else {
435 None
436 };
437 let mut args = vec![subject, pattern];
438 if let Some(g) = group {
439 args.push(g);
440 }
441 Ok(Expression::Function(Box::new(Function::new(
442 "REGEXP_EXTRACT".to_string(),
443 args,
444 ))))
445 }
446
447 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
449
450 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
452 "RLIKE".to_string(),
453 f.args,
454 )))),
455
456 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
458 "GET_JSON_OBJECT".to_string(),
459 f.args,
460 )))),
461
462 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
464 "GET_JSON_OBJECT".to_string(),
465 f.args,
466 )))),
467
468 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
470
471 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
474 "FROM_JSON".to_string(),
475 f.args,
476 )))),
477
478 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
480
481 "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
483
484 "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
486
487 "DATE_SUB" if f.args.len() == 2 => {
490 let mut args = f.args;
491 let date_arg = args.remove(0);
492 let interval_arg = args.remove(0);
493
494 let effective_interval = match &interval_arg {
496 Expression::Literal(_) | Expression::Column(_) | Expression::Identifier(_) => {
497 interval_arg
498 }
499 _ => Expression::Paren(Box::new(Paren {
500 this: interval_arg,
501 trailing_comments: Vec::new(),
502 })),
503 };
504
505 let negated_interval = Expression::Mul(Box::new(BinaryOp {
507 left: effective_interval,
508 right: Expression::Literal(Literal::Number("-1".to_string())),
509 left_comments: Vec::new(),
510 operator_comments: Vec::new(),
511 trailing_comments: Vec::new(),
512 inferred_type: None,
513 }));
514
515 Ok(Expression::Function(Box::new(Function::new(
516 "DATE_ADD".to_string(),
517 vec![date_arg, negated_interval],
518 ))))
519 }
520
521 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
523
524 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
526
527 "NVL" => Ok(Expression::Function(Box::new(f))),
529
530 "NVL2" => Ok(Expression::Function(Box::new(f))),
532
533 "MAP" => Ok(Expression::Function(Box::new(f))),
535
536 "ARRAY" => Ok(Expression::Function(Box::new(f))),
538
539 "STRUCT" => Ok(Expression::Function(Box::new(f))),
541
542 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
544
545 "DATE_PART" if f.args.len() == 2 => {
547 let mut args = f.args;
548 let part = args.remove(0);
549 let expr = args.remove(0);
550 if let Some(field) = hive_expr_to_datetime_field(&part) {
551 Ok(Expression::Extract(Box::new(ExtractFunc {
552 this: expr,
553 field,
554 })))
555 } else {
556 Ok(Expression::Function(Box::new(Function::new(
557 "DATE_PART".to_string(),
558 vec![part, expr],
559 ))))
560 }
561 }
562
563 _ => Ok(Expression::Function(Box::new(f))),
565 }
566 }
567
568 fn transform_aggregate_function(
569 &self,
570 f: Box<crate::expressions::AggregateFunction>,
571 ) -> Result<Expression> {
572 let name_upper = f.name.to_uppercase();
573 match name_upper.as_str() {
574 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
576 Function::new("COLLECT_LIST".to_string(), f.args),
577 ))),
578
579 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
581 Function::new("COLLECT_LIST".to_string(), f.args),
582 ))),
583
584 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
586 "COLLECT_LIST".to_string(),
587 f.args,
588 )))),
589
590 "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
592 "COLLECT_LIST".to_string(),
593 f.args,
594 )))),
595
596 _ => Ok(Expression::AggregateFunction(f)),
598 }
599 }
600}
601
602fn hive_expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
604 let name = match expr {
605 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
606 Expression::Identifier(id) => id.name.to_uppercase(),
607 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
608 _ => return None,
609 };
610 match name.as_str() {
611 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
612 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
613 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
614 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
615 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
616 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
617 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
618 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
619 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
620 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
621 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
622 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
623 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
624 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
625 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
626 _ => Some(DateTimeField::Custom(name)),
627 }
628}