arrow2 0.12.0

Unofficial implementation of Apache Arrow spec in safe Rust
Documentation
//! Conversion methods for dates and times.

use chrono::{
    format::{parse, Parsed, StrftimeItems},
    Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime,
};

use crate::error::Result;
use crate::{
    array::{Offset, PrimitiveArray, Utf8Array},
    error::Error,
};
use crate::{
    datatypes::{DataType, TimeUnit},
    types::months_days_ns,
};

/// Number of seconds in a day
pub const SECONDS_IN_DAY: i64 = 86_400;
/// Number of milliseconds in a second
pub const MILLISECONDS: i64 = 1_000;
/// Number of microseconds in a second
pub const MICROSECONDS: i64 = 1_000_000;
/// Number of nanoseconds in a second
pub const NANOSECONDS: i64 = 1_000_000_000;
/// Number of milliseconds in a day
pub const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS;
/// Number of days between 0001-01-01 and 1970-01-01
pub const EPOCH_DAYS_FROM_CE: i32 = 719_163;

/// converts a `i32` representing a `date32` to [`NaiveDateTime`]
#[inline]
pub fn date32_to_datetime(v: i32) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0)
}

/// converts a `i32` representing a `date32` to [`NaiveDate`]
#[inline]
pub fn date32_to_date(days: i32) -> NaiveDate {
    NaiveDate::from_num_days_from_ce(EPOCH_DAYS_FROM_CE + days)
}

/// converts a `i64` representing a `date64` to [`NaiveDateTime`]
#[inline]
pub fn date64_to_datetime(v: i64) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(
        // extract seconds from milliseconds
        v / MILLISECONDS,
        // discard extracted seconds and convert milliseconds to nanoseconds
        (v % MILLISECONDS * MICROSECONDS) as u32,
    )
}

/// converts a `i64` representing a `date64` to [`NaiveDate`]
#[inline]
pub fn date64_to_date(milliseconds: i64) -> NaiveDate {
    date64_to_datetime(milliseconds).date()
}

/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`]
#[inline]
pub fn time32s_to_time(v: i32) -> NaiveTime {
    NaiveTime::from_num_seconds_from_midnight(v as u32, 0)
}

/// converts a `i32` representing a `time32(ms)` to [`NaiveTime`]
#[inline]
pub fn time32ms_to_time(v: i32) -> NaiveTime {
    let v = v as i64;
    let seconds = v / MILLISECONDS;

    let milli_to_nano = 1_000_000;
    let nano = (v - seconds * MILLISECONDS) * milli_to_nano;
    NaiveTime::from_num_seconds_from_midnight(seconds as u32, nano as u32)
}

/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`]
#[inline]
pub fn time64us_to_time(v: i64) -> NaiveTime {
    NaiveTime::from_num_seconds_from_midnight(
        // extract seconds from microseconds
        (v / MICROSECONDS) as u32,
        // discard extracted seconds and convert microseconds to
        // nanoseconds
        (v % MICROSECONDS * MILLISECONDS) as u32,
    )
}

/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`]
#[inline]
pub fn time64ns_to_time(v: i64) -> NaiveTime {
    NaiveTime::from_num_seconds_from_midnight(
        // extract seconds from nanoseconds
        (v / NANOSECONDS) as u32,
        // discard extracted seconds
        (v % NANOSECONDS) as u32,
    )
}

/// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_s_to_datetime(seconds: i64) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(seconds, 0)
}

/// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_ms_to_datetime(v: i64) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(
        // extract seconds from milliseconds
        v / MILLISECONDS,
        // discard extracted seconds and convert milliseconds to nanoseconds
        (v % MILLISECONDS * MICROSECONDS) as u32,
    )
}

/// converts a `i64` representing a `timestamp(us)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_us_to_datetime(v: i64) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(
        // extract seconds from microseconds
        v / MICROSECONDS,
        // discard extracted seconds and convert microseconds to nanoseconds
        (v % MICROSECONDS * MILLISECONDS) as u32,
    )
}

/// converts a `i64` representing a `timestamp(ns)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime {
    NaiveDateTime::from_timestamp(
        // extract seconds from nanoseconds
        v / NANOSECONDS,
        // discard extracted seconds
        (v % NANOSECONDS) as u32,
    )
}

/// Converts a timestamp in `time_unit` and `timezone` into [`chrono::DateTime`].
#[inline]
pub fn timestamp_to_naive_datetime(timestamp: i64, time_unit: TimeUnit) -> chrono::NaiveDateTime {
    match time_unit {
        TimeUnit::Second => timestamp_s_to_datetime(timestamp),
        TimeUnit::Millisecond => timestamp_ms_to_datetime(timestamp),
        TimeUnit::Microsecond => timestamp_us_to_datetime(timestamp),
        TimeUnit::Nanosecond => timestamp_ns_to_datetime(timestamp),
    }
}

/// Converts a timestamp in `time_unit` and `timezone` into [`chrono::DateTime`].
#[inline]
pub fn timestamp_to_datetime<T: chrono::TimeZone>(
    timestamp: i64,
    time_unit: TimeUnit,
    timezone: &T,
) -> chrono::DateTime<T> {
    timezone.from_utc_datetime(&timestamp_to_naive_datetime(timestamp, time_unit))
}

/// Calculates the scale factor between two TimeUnits. The function returns the
/// scale that should multiply the TimeUnit "b" to have the same time scale as
/// the TimeUnit "a".
pub fn timeunit_scale(a: TimeUnit, b: TimeUnit) -> f64 {
    match (a, b) {
        (TimeUnit::Second, TimeUnit::Second) => 1.0,
        (TimeUnit::Second, TimeUnit::Millisecond) => 0.001,
        (TimeUnit::Second, TimeUnit::Microsecond) => 0.000_001,
        (TimeUnit::Second, TimeUnit::Nanosecond) => 0.000_000_001,
        (TimeUnit::Millisecond, TimeUnit::Second) => 1_000.0,
        (TimeUnit::Millisecond, TimeUnit::Millisecond) => 1.0,
        (TimeUnit::Millisecond, TimeUnit::Microsecond) => 0.001,
        (TimeUnit::Millisecond, TimeUnit::Nanosecond) => 0.000_001,
        (TimeUnit::Microsecond, TimeUnit::Second) => 1_000_000.0,
        (TimeUnit::Microsecond, TimeUnit::Millisecond) => 1_000.0,
        (TimeUnit::Microsecond, TimeUnit::Microsecond) => 1.0,
        (TimeUnit::Microsecond, TimeUnit::Nanosecond) => 0.001,
        (TimeUnit::Nanosecond, TimeUnit::Second) => 1_000_000_000.0,
        (TimeUnit::Nanosecond, TimeUnit::Millisecond) => 1_000_000.0,
        (TimeUnit::Nanosecond, TimeUnit::Microsecond) => 1_000.0,
        (TimeUnit::Nanosecond, TimeUnit::Nanosecond) => 1.0,
    }
}

/// Parses an offset of the form `"+WX:YZ"` or `"UTC"` into [`FixedOffset`].
/// # Errors
/// If the offset is not in any of the allowed forms.
pub fn parse_offset(offset: &str) -> Result<FixedOffset> {
    if offset == "UTC" {
        return Ok(FixedOffset::east(0));
    }
    let error = "timezone offset must be of the form [-]00:00";

    let mut a = offset.split(':');
    let first = a
        .next()
        .map(Ok)
        .unwrap_or_else(|| Err(Error::InvalidArgumentError(error.to_string())))?;
    let last = a
        .next()
        .map(Ok)
        .unwrap_or_else(|| Err(Error::InvalidArgumentError(error.to_string())))?;
    let hours: i32 = first
        .parse()
        .map_err(|_| Error::InvalidArgumentError(error.to_string()))?;
    let minutes: i32 = last
        .parse()
        .map_err(|_| Error::InvalidArgumentError(error.to_string()))?;

    Ok(FixedOffset::east(hours * 60 * 60 + minutes * 60))
}

/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp with timezone.
/// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`).
#[inline]
pub fn utf8_to_timestamp_ns_scalar<T: chrono::TimeZone>(
    value: &str,
    fmt: &str,
    tz: &T,
) -> Option<i64> {
    let mut parsed = Parsed::new();
    let fmt = StrftimeItems::new(fmt);
    let r = parse(&mut parsed, value, fmt).ok();
    if r.is_some() {
        parsed
            .to_datetime()
            .map(|x| x.naive_utc())
            .map(|x| tz.from_utc_datetime(&x))
            .map(|x| x.timestamp_nanos())
            .ok()
    } else {
        None
    }
}

/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp without timezone.
#[inline]
pub fn utf8_to_naive_timestamp_ns_scalar(value: &str, fmt: &str) -> Option<i64> {
    let fmt = StrftimeItems::new(fmt);
    let mut parsed = Parsed::new();
    parse(&mut parsed, value, fmt.clone()).ok();
    parsed
        .to_naive_datetime_with_offset(0)
        .map(|x| x.timestamp_nanos())
        .ok()
}

fn utf8_to_timestamp_ns_impl<O: Offset, T: chrono::TimeZone>(
    array: &Utf8Array<O>,
    fmt: &str,
    timezone: String,
    tz: T,
) -> PrimitiveArray<i64> {
    let iter = array
        .iter()
        .map(|x| x.and_then(|x| utf8_to_timestamp_ns_scalar(x, fmt, &tz)));

    PrimitiveArray::from_trusted_len_iter(iter)
        .to(DataType::Timestamp(TimeUnit::Nanosecond, Some(timezone)))
}

/// Parses `value` to a [`chrono_tz::Tz`] with the Arrow's definition of timestamp with a timezone.
#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
pub fn parse_offset_tz(timezone: &str) -> Result<chrono_tz::Tz> {
    timezone.parse::<chrono_tz::Tz>().map_err(|_| {
        Error::InvalidArgumentError(format!("timezone \"{}\" cannot be parsed", timezone))
    })
}

#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
fn chrono_tz_utf_to_timestamp_ns<O: Offset>(
    array: &Utf8Array<O>,
    fmt: &str,
    timezone: String,
) -> Result<PrimitiveArray<i64>> {
    let tz = parse_offset_tz(&timezone)?;
    Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz))
}

#[cfg(not(feature = "chrono-tz"))]
fn chrono_tz_utf_to_timestamp_ns<O: Offset>(
    _: &Utf8Array<O>,
    _: &str,
    timezone: String,
) -> Result<PrimitiveArray<i64>> {
    Err(Error::InvalidArgumentError(format!(
        "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)",
        timezone
    )))
}

/// Parses a [`Utf8Array`] to a timeozone-aware timestamp, i.e. [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, Some(timezone))`.
/// # Implementation
/// * parsed values with timezone other than `timezone` are converted to `timezone`.
/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp_ns`] to parse naive timezones.
/// * Null elements remain null; non-parsable elements are null.
/// The feature `"chrono-tz"` enables IANA and zoneinfo formats for `timezone`.
/// # Error
/// This function errors iff `timezone` is not parsable to an offset.
pub fn utf8_to_timestamp_ns<O: Offset>(
    array: &Utf8Array<O>,
    fmt: &str,
    timezone: String,
) -> Result<PrimitiveArray<i64>> {
    let tz = parse_offset(timezone.as_str());

    if let Ok(tz) = tz {
        Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz))
    } else {
        chrono_tz_utf_to_timestamp_ns(array, fmt, timezone)
    }
}

/// Parses a [`Utf8Array`] to naive timestamp, i.e.
/// [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, None)`.
/// Timezones are ignored.
/// Null elements remain null; non-parsable elements are set to null.
pub fn utf8_to_naive_timestamp_ns<O: Offset>(
    array: &Utf8Array<O>,
    fmt: &str,
) -> PrimitiveArray<i64> {
    let iter = array
        .iter()
        .map(|x| x.and_then(|x| utf8_to_naive_timestamp_ns_scalar(x, fmt)));

    PrimitiveArray::from_trusted_len_iter(iter).to(DataType::Timestamp(TimeUnit::Nanosecond, None))
}

fn add_month(year: i32, month: u32, months: i32) -> chrono::NaiveDate {
    let new_year = (year * 12 + (month - 1) as i32 + months) / 12;
    let new_month = (year * 12 + (month - 1) as i32 + months) % 12 + 1;
    chrono::NaiveDate::from_ymd(new_year, new_month as u32, 1)
}

fn get_days_between_months(year: i32, month: u32, months: i32) -> i64 {
    add_month(year, month, months)
        .signed_duration_since(chrono::NaiveDate::from_ymd(year, month, 1))
        .num_days()
}

/// Adds an `interval` to a `timestamp` in `time_unit` units without timezone.
#[inline]
pub fn add_naive_interval(timestamp: i64, time_unit: TimeUnit, interval: months_days_ns) -> i64 {
    // convert seconds to a DateTime of a given offset.
    let datetime = match time_unit {
        TimeUnit::Second => timestamp_s_to_datetime(timestamp),
        TimeUnit::Millisecond => timestamp_ms_to_datetime(timestamp),
        TimeUnit::Microsecond => timestamp_us_to_datetime(timestamp),
        TimeUnit::Nanosecond => timestamp_ns_to_datetime(timestamp),
    };

    // compute the number of days in the interval, which depends on the particular year and month (leap days)
    let delta_days = get_days_between_months(datetime.year(), datetime.month(), interval.months())
        + interval.days() as i64;

    // add; no leap hours are considered
    let new_datetime_tz = datetime
        + chrono::Duration::nanoseconds(delta_days * 24 * 60 * 60 * 1_000_000_000 + interval.ns());

    // convert back to the target unit
    match time_unit {
        TimeUnit::Second => new_datetime_tz.timestamp_millis() / 1000,
        TimeUnit::Millisecond => new_datetime_tz.timestamp_millis(),
        TimeUnit::Microsecond => new_datetime_tz.timestamp_nanos() / 1000,
        TimeUnit::Nanosecond => new_datetime_tz.timestamp_nanos(),
    }
}

/// Adds an `interval` to a `timestamp` in `time_unit` units and timezone `timezone`.
#[inline]
pub fn add_interval<T: chrono::TimeZone>(
    timestamp: i64,
    time_unit: TimeUnit,
    interval: months_days_ns,
    timezone: &T,
) -> i64 {
    // convert seconds to a DateTime of a given offset.
    let datetime_tz = timestamp_to_datetime(timestamp, time_unit, timezone);

    // compute the number of days in the interval, which depends on the particular year and month (leap days)
    let delta_days =
        get_days_between_months(datetime_tz.year(), datetime_tz.month(), interval.months())
            + interval.days() as i64;

    // add; tz will take care of leap hours
    let new_datetime_tz = datetime_tz
        + chrono::Duration::nanoseconds(delta_days * 24 * 60 * 60 * 1_000_000_000 + interval.ns());

    // convert back to the target unit
    match time_unit {
        TimeUnit::Second => new_datetime_tz.timestamp_millis() / 1000,
        TimeUnit::Millisecond => new_datetime_tz.timestamp_millis(),
        TimeUnit::Microsecond => new_datetime_tz.timestamp_nanos() / 1000,
        TimeUnit::Nanosecond => new_datetime_tz.timestamp_nanos(),
    }
}