1use super::{DialectImpl, DialectType};
16use crate::error::Result;
17use crate::expressions::{
18 BinaryOp, CeilFunc, DateTimeField, Expression, ExtractFunc, Function, LikeOp, Literal, Paren,
19 UnaryFunc, VarArgFunc,
20};
21use crate::generator::GeneratorConfig;
22use crate::tokens::TokenizerConfig;
23
24pub struct HiveDialect;
26
27impl DialectImpl for HiveDialect {
28 fn dialect_type(&self) -> DialectType {
29 DialectType::Hive
30 }
31
32 fn tokenizer_config(&self) -> TokenizerConfig {
33 let mut config = TokenizerConfig::default();
34 config.identifiers.clear();
36 config.identifiers.insert('`', '`');
37 config.quotes.insert("\"".to_string(), "\"".to_string());
39 config.string_escapes.push('\\');
41 config
43 .keywords
44 .insert("DIV".to_string(), crate::tokens::TokenType::Div);
45 config
47 .numeric_literals
48 .insert("L".to_string(), "BIGINT".to_string());
49 config
50 .numeric_literals
51 .insert("S".to_string(), "SMALLINT".to_string());
52 config
53 .numeric_literals
54 .insert("Y".to_string(), "TINYINT".to_string());
55 config
56 .numeric_literals
57 .insert("D".to_string(), "DOUBLE".to_string());
58 config
59 .numeric_literals
60 .insert("F".to_string(), "FLOAT".to_string());
61 config
62 .numeric_literals
63 .insert("BD".to_string(), "DECIMAL".to_string());
64 config.identifiers_can_start_with_digit = true;
66 config
67 }
68
69 fn generator_config(&self) -> GeneratorConfig {
70 use crate::generator::IdentifierQuoteStyle;
71 GeneratorConfig {
72 identifier_quote: '`',
73 identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
74 dialect: Some(DialectType::Hive),
75 struct_field_sep: ": ",
77 alias_post_tablesample: true,
79 join_hints: false,
80 identifiers_can_start_with_digit: true,
81 schema_comment_with_eq: false,
83 ..Default::default()
84 }
85 }
86
87 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
88 match expr {
89 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
91 original_name: None,
92 expressions: vec![f.this, f.expression],
93 }))),
94
95 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
97 original_name: None,
98 expressions: vec![f.this, f.expression],
99 }))),
100
101 Expression::SafeCast(c) => Ok(Expression::Cast(c)),
106
107 Expression::ILike(op) => {
109 let lower_left = Expression::Lower(Box::new(UnaryFunc::new(op.left)));
110 let lower_right = Expression::Lower(Box::new(UnaryFunc::new(op.right)));
111 Ok(Expression::Like(Box::new(LikeOp {
112 left: lower_left,
113 right: lower_right,
114 escape: op.escape,
115 quantifier: op.quantifier,
116 })))
117 }
118
119 Expression::Unnest(f) => Ok(Expression::Explode(Box::new(UnaryFunc::new(f.this)))),
121
122 Expression::Explode(f) => Ok(Expression::Explode(f)),
124
125 Expression::ExplodeOuter(f) => Ok(Expression::ExplodeOuter(f)),
127
128 Expression::Random(_) => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
130 seed: None,
131 lower: None,
132 upper: None,
133 }))),
134
135 Expression::Rand(r) => Ok(Expression::Rand(r)),
137
138 Expression::Concat(op) => Ok(Expression::Function(Box::new(Function::new(
140 "CONCAT".to_string(),
141 vec![op.left, op.right],
142 )))),
143
144 Expression::Function(f) => self.transform_function(*f),
146
147 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
149
150 _ => Ok(expr),
152 }
153 }
154}
155
156impl HiveDialect {
157 fn transform_function(&self, f: Function) -> Result<Expression> {
158 let name_upper = f.name.to_uppercase();
159 match name_upper.as_str() {
160 "LOG" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
162 "LN".to_string(),
163 f.args,
164 )))),
165
166 "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
168 original_name: None,
169 expressions: f.args,
170 }))),
171
172 "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
174 original_name: None,
175 expressions: f.args,
176 }))),
177
178 "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
180 original_name: None,
181 expressions: f.args,
182 }))),
183
184 "GROUP_CONCAT" if !f.args.is_empty() => {
187 Ok(Expression::Function(Box::new(Function::new(
190 "COLLECT_LIST".to_string(),
191 f.args,
192 ))))
193 }
194
195 "STRING_AGG" if !f.args.is_empty() => {
197 Ok(Expression::Function(Box::new(Function::new(
199 "COLLECT_LIST".to_string(),
200 f.args,
201 ))))
202 }
203
204 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
206 "COLLECT_LIST".to_string(),
207 f.args,
208 )))),
209
210 "SUBSTRING" | "SUBSTR" => Ok(Expression::Function(Box::new(f))),
212
213 "LENGTH" => Ok(Expression::Function(Box::new(f))),
215
216 "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
218 f.args.into_iter().next().unwrap(),
219 )))),
220
221 "RANDOM" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
223 seed: None,
224 lower: None,
225 upper: None,
226 }))),
227
228 "RAND" => Ok(Expression::Rand(Box::new(crate::expressions::Rand {
230 seed: None,
231 lower: None,
232 upper: None,
233 }))),
234
235 "NOW" => Ok(Expression::CurrentTimestamp(
237 crate::expressions::CurrentTimestamp {
238 precision: None,
239 sysdate: false,
240 },
241 )),
242
243 "GETDATE" => Ok(Expression::CurrentTimestamp(
245 crate::expressions::CurrentTimestamp {
246 precision: None,
247 sysdate: false,
248 },
249 )),
250
251 "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
253 crate::expressions::CurrentTimestamp {
254 precision: None,
255 sysdate: false,
256 },
257 )),
258
259 "CURRENT_DATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
261
262 "TO_DATE" => Ok(Expression::Function(Box::new(f))),
264
265 "TO_TIMESTAMP" if f.args.len() == 1 => {
267 Ok(Expression::Function(Box::new(Function::new(
269 "CAST".to_string(),
270 f.args,
271 ))))
272 }
273
274 "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
276
277 "STRFTIME" => Ok(Expression::Function(Box::new(Function::new(
279 "DATE_FORMAT".to_string(),
280 f.args,
281 )))),
282
283 "TO_CHAR" => Ok(Expression::Function(Box::new(Function::new(
285 "DATE_FORMAT".to_string(),
286 f.args,
287 )))),
288
289 "DATE_TRUNC" => Ok(Expression::Function(Box::new(Function::new(
291 "TRUNC".to_string(),
292 f.args,
293 )))),
294
295 "TRUNC" => Ok(Expression::Function(Box::new(f))),
297
298 "EXTRACT" => Ok(Expression::Function(Box::new(f))),
300
301 "DATEPART" => Ok(Expression::Function(Box::new(Function::new(
303 "EXTRACT".to_string(),
304 f.args,
305 )))),
306
307 "UNIX_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
309
310 "FROM_UNIXTIME" => Ok(Expression::Function(Box::new(f))),
312
313 "POSITION" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
315 "LOCATE".to_string(),
316 f.args,
317 )))),
318
319 "STRPOS" if f.args.len() == 2 => {
321 let mut args = f.args;
322 let first = args.remove(0);
323 let second = args.remove(0);
324 Ok(Expression::Function(Box::new(Function::new(
326 "LOCATE".to_string(),
327 vec![second, first],
328 ))))
329 }
330
331 "CHARINDEX" if f.args.len() >= 2 => {
333 let mut args = f.args;
334 let substring = args.remove(0);
335 let string = args.remove(0);
336 let mut locate_args = vec![substring, string];
338 if !args.is_empty() {
339 locate_args.push(args.remove(0));
340 }
341 Ok(Expression::Function(Box::new(Function::new(
342 "LOCATE".to_string(),
343 locate_args,
344 ))))
345 }
346
347 "INSTR" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
349 "LOCATE".to_string(),
350 f.args,
351 )))),
352
353 "LOCATE" => Ok(Expression::Function(Box::new(f))),
355
356 "CEILING" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
358 this: f.args.into_iter().next().unwrap(),
359 decimals: None,
360 to: None,
361 }))),
362
363 "CEIL" if f.args.len() == 1 => Ok(Expression::Ceil(Box::new(CeilFunc {
365 this: f.args.into_iter().next().unwrap(),
366 decimals: None,
367 to: None,
368 }))),
369
370 "UNNEST" => Ok(Expression::Function(Box::new(Function::new(
372 "EXPLODE".to_string(),
373 f.args,
374 )))),
375
376 "FLATTEN" => Ok(Expression::Function(Box::new(Function::new(
378 "EXPLODE".to_string(),
379 f.args,
380 )))),
381
382 "ARRAY_AGG" => Ok(Expression::Function(Box::new(Function::new(
384 "COLLECT_LIST".to_string(),
385 f.args,
386 )))),
387
388 "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
390
391 "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
393
394 "ARRAY_LENGTH" | "ARRAY_SIZE" | "CARDINALITY" => Ok(Expression::Function(Box::new(
396 Function::new("SIZE".to_string(), f.args),
397 ))),
398
399 "SIZE" => Ok(Expression::Function(Box::new(f))),
401
402 "SPLIT" => Ok(Expression::Function(Box::new(f))),
404
405 "REGEXP_REPLACE" if f.args.len() > 3 => {
407 let args = f.args[..3].to_vec();
408 Ok(Expression::Function(Box::new(Function::new(
409 "REGEXP_REPLACE".to_string(),
410 args,
411 ))))
412 }
413 "REGEXP_REPLACE" => Ok(Expression::Function(Box::new(f))),
415
416 "REGEXP_SUBSTR" if f.args.len() >= 2 => {
418 let subject = f.args[0].clone();
419 let pattern = f.args[1].clone();
420 let group = if f.args.len() >= 6 {
421 let g = &f.args[5];
422 if matches!(g, Expression::Literal(crate::expressions::Literal::Number(n)) if n == "1")
423 {
424 None
425 } else {
426 Some(g.clone())
427 }
428 } else {
429 None
430 };
431 let mut args = vec![subject, pattern];
432 if let Some(g) = group {
433 args.push(g);
434 }
435 Ok(Expression::Function(Box::new(Function::new(
436 "REGEXP_EXTRACT".to_string(),
437 args,
438 ))))
439 }
440
441 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(f))),
443
444 "RLIKE" | "REGEXP_LIKE" => Ok(Expression::Function(Box::new(Function::new(
446 "RLIKE".to_string(),
447 f.args,
448 )))),
449
450 "JSON_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
452 "GET_JSON_OBJECT".to_string(),
453 f.args,
454 )))),
455
456 "JSON_EXTRACT_SCALAR" => Ok(Expression::Function(Box::new(Function::new(
458 "GET_JSON_OBJECT".to_string(),
459 f.args,
460 )))),
461
462 "GET_JSON_OBJECT" => Ok(Expression::Function(Box::new(f))),
464
465 "PARSE_JSON" => Ok(Expression::Function(Box::new(Function::new(
468 "FROM_JSON".to_string(),
469 f.args,
470 )))),
471
472 "TO_JSON" => Ok(Expression::Function(Box::new(f))),
474
475 "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
477
478 "DATE_ADD" => Ok(Expression::Function(Box::new(f))),
480
481 "DATE_SUB" if f.args.len() == 2 => {
484 let mut args = f.args;
485 let date_arg = args.remove(0);
486 let interval_arg = args.remove(0);
487
488 let effective_interval = match &interval_arg {
490 Expression::Literal(_) | Expression::Column(_) | Expression::Identifier(_) => {
491 interval_arg
492 }
493 _ => Expression::Paren(Box::new(Paren {
494 this: interval_arg,
495 trailing_comments: Vec::new(),
496 })),
497 };
498
499 let negated_interval = Expression::Mul(Box::new(BinaryOp {
501 left: effective_interval,
502 right: Expression::Literal(Literal::Number("-1".to_string())),
503 left_comments: Vec::new(),
504 operator_comments: Vec::new(),
505 trailing_comments: Vec::new(),
506 }));
507
508 Ok(Expression::Function(Box::new(Function::new(
509 "DATE_ADD".to_string(),
510 vec![date_arg, negated_interval],
511 ))))
512 }
513
514 "ADD_MONTHS" => Ok(Expression::Function(Box::new(f))),
516
517 "MONTHS_BETWEEN" => Ok(Expression::Function(Box::new(f))),
519
520 "NVL" => Ok(Expression::Function(Box::new(f))),
522
523 "NVL2" => Ok(Expression::Function(Box::new(f))),
525
526 "MAP" => Ok(Expression::Function(Box::new(f))),
528
529 "ARRAY" => Ok(Expression::Function(Box::new(f))),
531
532 "STRUCT" => Ok(Expression::Function(Box::new(f))),
534
535 "NAMED_STRUCT" => Ok(Expression::Function(Box::new(f))),
537
538 "DATE_PART" if f.args.len() == 2 => {
540 let mut args = f.args;
541 let part = args.remove(0);
542 let expr = args.remove(0);
543 if let Some(field) = hive_expr_to_datetime_field(&part) {
544 Ok(Expression::Extract(Box::new(ExtractFunc {
545 this: expr,
546 field,
547 })))
548 } else {
549 Ok(Expression::Function(Box::new(Function::new(
550 "DATE_PART".to_string(),
551 vec![part, expr],
552 ))))
553 }
554 }
555
556 _ => Ok(Expression::Function(Box::new(f))),
558 }
559 }
560
561 fn transform_aggregate_function(
562 &self,
563 f: Box<crate::expressions::AggregateFunction>,
564 ) -> Result<Expression> {
565 let name_upper = f.name.to_uppercase();
566 match name_upper.as_str() {
567 "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
569 Function::new("COLLECT_LIST".to_string(), f.args),
570 ))),
571
572 "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
574 Function::new("COLLECT_LIST".to_string(), f.args),
575 ))),
576
577 "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
579 "COLLECT_LIST".to_string(),
580 f.args,
581 )))),
582
583 "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
585 "COLLECT_LIST".to_string(),
586 f.args,
587 )))),
588
589 _ => Ok(Expression::AggregateFunction(f)),
591 }
592 }
593}
594
595fn hive_expr_to_datetime_field(expr: &Expression) -> Option<DateTimeField> {
597 let name = match expr {
598 Expression::Literal(Literal::String(s)) => s.to_uppercase(),
599 Expression::Identifier(id) => id.name.to_uppercase(),
600 Expression::Column(col) if col.table.is_none() => col.name.name.to_uppercase(),
601 _ => return None,
602 };
603 match name.as_str() {
604 "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => Some(DateTimeField::Year),
605 "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => Some(DateTimeField::Month),
606 "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => Some(DateTimeField::Day),
607 "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => Some(DateTimeField::Hour),
608 "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => Some(DateTimeField::Minute),
609 "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Some(DateTimeField::Second),
610 "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => Some(DateTimeField::Millisecond),
611 "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => Some(DateTimeField::Microsecond),
612 "DOW" | "DAYOFWEEK" | "DAYOFWEEK_ISO" | "DW" => Some(DateTimeField::DayOfWeek),
613 "DOY" | "DAYOFYEAR" => Some(DateTimeField::DayOfYear),
614 "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" => Some(DateTimeField::Week),
615 "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => Some(DateTimeField::Quarter),
616 "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => Some(DateTimeField::Epoch),
617 "TIMEZONE" | "TIMEZONE_HOUR" | "TZH" => Some(DateTimeField::TimezoneHour),
618 "TIMEZONE_MINUTE" | "TZM" => Some(DateTimeField::TimezoneMinute),
619 _ => Some(DateTimeField::Custom(name)),
620 }
621}