use sqlparser::parser::ParserError;
use crate::{DataFusionError, Result, ScalarValue};
use std::result;
use std::str::FromStr;
const SECONDS_PER_HOUR: f64 = 3_600_f64;
const NANOS_PER_SECOND: f64 = 1_000_000_000_f64;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionTypeVariant {
GZIP,
BZIP2,
XZ,
UNCOMPRESSED,
}
impl FromStr for CompressionTypeVariant {
type Err = ParserError;
fn from_str(s: &str) -> result::Result<Self, ParserError> {
let s = s.to_uppercase();
match s.as_str() {
"GZIP" | "GZ" => Ok(Self::GZIP),
"BZIP2" | "BZ2" => Ok(Self::BZIP2),
"XZ" => Ok(Self::XZ),
"" => Ok(Self::UNCOMPRESSED),
_ => Err(ParserError::ParserError(format!(
"Unsupported file compression type {s}"
))),
}
}
}
impl ToString for CompressionTypeVariant {
fn to_string(&self) -> String {
match self {
Self::GZIP => "GZIP",
Self::BZIP2 => "BZIP2",
Self::XZ => "XZ",
Self::UNCOMPRESSED => "",
}
.to_string()
}
}
impl CompressionTypeVariant {
pub const fn is_compressed(&self) -> bool {
!matches!(self, &Self::UNCOMPRESSED)
}
}
#[derive(Clone, Copy)]
#[repr(u16)]
enum IntervalType {
Century = 0b_00_0000_0001,
Decade = 0b_00_0000_0010,
Year = 0b_00_0000_0100,
Month = 0b_00_0000_1000,
Week = 0b_00_0001_0000,
Day = 0b_00_0010_0000,
Hour = 0b_00_0100_0000,
Minute = 0b_00_1000_0000,
Second = 0b_01_0000_0000,
Millisecond = 0b_10_0000_0000,
}
impl FromStr for IntervalType {
type Err = DataFusionError;
fn from_str(s: &str) -> Result<Self> {
match s.to_lowercase().as_str() {
"century" | "centuries" => Ok(Self::Century),
"decade" | "decades" => Ok(Self::Decade),
"year" | "years" => Ok(Self::Year),
"month" | "months" => Ok(Self::Month),
"week" | "weeks" => Ok(Self::Week),
"day" | "days" => Ok(Self::Day),
"hour" | "hours" => Ok(Self::Hour),
"minute" | "minutes" => Ok(Self::Minute),
"second" | "seconds" => Ok(Self::Second),
"millisecond" | "milliseconds" => Ok(Self::Millisecond),
_ => Err(DataFusionError::NotImplemented(format!(
"Unknown interval type: {s}"
))),
}
}
}
pub fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
let align_interval_parts =
|month_part: f64, mut day_part: f64, mut nanos_part: f64| -> (i64, i64, f64) {
day_part += (month_part - (month_part as i64) as f64) * 30_f64;
nanos_part += (day_part - ((day_part as i64) as f64))
* 24_f64
* SECONDS_PER_HOUR
* NANOS_PER_SECOND;
(month_part as i64, day_part as i64, nanos_part)
};
let mut used_interval_types = 0;
let mut calculate_from_part = |interval_period_str: &str,
interval_type: &str|
-> Result<(i64, i64, f64)> {
let interval_period = match f64::from_str(interval_period_str) {
Ok(n) => n,
Err(_) => {
return Err(DataFusionError::NotImplemented(format!(
"Unsupported Interval Expression with value {value:?}"
)));
}
};
if interval_period > (i64::MAX as f64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {value:?}"
)));
}
let it = IntervalType::from_str(interval_type).map_err(|_| {
DataFusionError::NotImplemented(format!(
"Invalid input syntax for type interval: {value:?}"
))
})?;
if used_interval_types & (it as u16) != 0 {
return Err(DataFusionError::SQL(ParserError::ParserError(format!(
"Invalid input syntax for type interval: {value:?}. Repeated type '{interval_type}'"
))));
} else {
used_interval_types |= it as u16;
}
match it {
IntervalType::Century => {
Ok(align_interval_parts(interval_period * 1200_f64, 0.0, 0.0))
}
IntervalType::Decade => {
Ok(align_interval_parts(interval_period * 120_f64, 0.0, 0.0))
}
IntervalType::Year => {
Ok(align_interval_parts(interval_period * 12_f64, 0.0, 0.0))
}
IntervalType::Month => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
IntervalType::Week => {
Ok(align_interval_parts(0.0, interval_period * 7_f64, 0.0))
}
IntervalType::Day => Ok(align_interval_parts(0.0, interval_period, 0.0)),
IntervalType::Hour => {
Ok((0, 0, interval_period * SECONDS_PER_HOUR * NANOS_PER_SECOND))
}
IntervalType::Minute => {
Ok((0, 0, interval_period * 60_f64 * NANOS_PER_SECOND))
}
IntervalType::Second => Ok((0, 0, interval_period * NANOS_PER_SECOND)),
IntervalType::Millisecond => Ok((0, 0, interval_period * 1_000_000f64)),
}
};
let mut result_month: i64 = 0;
let mut result_days: i64 = 0;
let mut result_nanos: i128 = 0;
let mut parts = value.split_whitespace();
loop {
let interval_period_str = parts.next();
if interval_period_str.is_none() {
break;
}
let unit = parts.next().unwrap_or(leading_field);
let (diff_month, diff_days, diff_nanos) =
calculate_from_part(interval_period_str.unwrap(), unit)?;
result_month += diff_month;
if result_month > (i32::MAX as i64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {value:?}"
)));
}
result_days += diff_days;
if result_days > (i32::MAX as i64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {value:?}"
)));
}
result_nanos += diff_nanos as i128;
if result_nanos > (i64::MAX as i128) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {value:?}"
)));
}
}
if (result_nanos % 1_000_000 != 0)
|| (result_month != 0 && (result_days != 0 || result_nanos != 0))
{
let result: i128 = ((result_month as i128) << 96)
| (((result_days & u32::MAX as i64) as i128) << 64)
| (result_nanos & u64::MAX as i128);
return Ok(ScalarValue::IntervalMonthDayNano(Some(result)));
}
if result_month != 0 {
return Ok(ScalarValue::IntervalYearMonth(Some(result_month as i32)));
}
let result: i64 =
(result_days << 32) | ((result_nanos as i64 / 1_000_000) & (u32::MAX as i64));
Ok(ScalarValue::IntervalDayTime(Some(result)))
}
#[cfg(test)]
mod test {
use super::*;
use crate::assert_contains;
const MILLIS_PER_SECOND: f64 = 1_000_f64;
#[test]
fn test_parse_ym() {
assert_eq!(
parse_interval("months", "1 month").unwrap(),
ScalarValue::new_interval_ym(0, 1)
);
assert_eq!(
parse_interval("months", "2 month").unwrap(),
ScalarValue::new_interval_ym(0, 2)
);
assert_eq!(
parse_interval("months", "3 year 1 month").unwrap(),
ScalarValue::new_interval_ym(3, 1)
);
assert_contains!(
parse_interval("months", "1 centurys 1 month")
.unwrap_err()
.to_string(),
r#"Invalid input syntax for type interval: "1 centurys 1 month""#
);
assert_eq!(
parse_interval("months", "3 year -1 month").unwrap(),
ScalarValue::new_interval_ym(3, -1)
);
assert_eq!(
parse_interval("months", "-3 year -1 month").unwrap(),
ScalarValue::new_interval_ym(-3, -1)
);
assert_eq!(
parse_interval("months", "-3 year 1 month").unwrap(),
ScalarValue::new_interval_ym(-3, 1)
);
}
#[test]
fn test_dt() {
assert_eq!(
parse_interval("months", "5 days").unwrap(),
ScalarValue::new_interval_dt(5, 0)
);
assert_eq!(
parse_interval("months", "7 days 3 hours").unwrap(),
ScalarValue::new_interval_dt(
7,
(3.0 * SECONDS_PER_HOUR * MILLIS_PER_SECOND) as i32
)
);
assert_eq!(
parse_interval("months", "7 days 5 minutes").unwrap(),
ScalarValue::new_interval_dt(7, 5 * 60 * MILLIS_PER_SECOND as i32)
);
assert_eq!(
parse_interval("months", "7 days -5 minutes").unwrap(),
ScalarValue::new_interval_dt(7, -5 * 60 * MILLIS_PER_SECOND as i32)
);
assert_eq!(
parse_interval("months", "-7 days 5 hours").unwrap(),
ScalarValue::new_interval_dt(-7, 5 * 60 * 60 * MILLIS_PER_SECOND as i32)
);
assert_eq!(
parse_interval("months", "-7 days -5 hours -5 minutes -5 seconds").unwrap(),
ScalarValue::new_interval_dt(
-7,
-(5 * 60 * 60 + 5 * 60 + 5) * MILLIS_PER_SECOND as i32
)
);
}
#[test]
fn test_mdn() {
assert_eq!(
parse_interval("months", "1 year 25 millisecond").unwrap(),
ScalarValue::new_interval_mdn(12, 0, 25 * 1_000_000)
);
assert_eq!(
parse_interval("months", "1 year 1 day 0.000000001 seconds").unwrap(),
ScalarValue::new_interval_mdn(12, 1, 1)
);
assert_eq!(
parse_interval("months", "1 year 1 day 0.1 milliseconds").unwrap(),
ScalarValue::new_interval_mdn(12, 1, 1_00 * 1_000)
);
assert_eq!(
parse_interval("months", "1 month -1 second").unwrap(),
ScalarValue::new_interval_mdn(1, 0, -1_000_000_000)
);
assert_eq!(
parse_interval("months", "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond").unwrap(),
ScalarValue::new_interval_mdn(-13, -8, -(60 * 60 + 60 + 1) * NANOS_PER_SECOND as i64 - 1_110_000)
);
}
#[test]
fn test_duplicate_interval_type() {
let err = parse_interval("months", "1 month 1 second 1 second")
.expect_err("parsing interval should have failed");
assert_eq!(
r#"SQL(ParserError("Invalid input syntax for type interval: \"1 month 1 second 1 second\". Repeated type 'second'"))"#,
format!("{err:?}")
);
}
}