#![forbid(unsafe_code)]
#![warn(rustdoc::broken_intra_doc_links)]
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DType {
Null,
Bool,
#[serde(rename = "boolean")]
BoolNullable,
Int64,
#[serde(rename = "Int64")]
Int64Nullable,
Float64,
#[serde(alias = "string", alias = "str")]
Utf8,
Categorical,
Timedelta64,
Datetime64,
Period,
Interval,
Sparse,
}
impl DType {
#[must_use]
pub const fn is_numeric(&self) -> bool {
matches!(self, Self::Int64 | Self::Int64Nullable | Self::Float64)
}
#[must_use]
pub const fn is_integer(&self) -> bool {
matches!(self, Self::Int64 | Self::Int64Nullable)
}
#[must_use]
pub const fn is_floating(&self) -> bool {
matches!(self, Self::Float64)
}
#[must_use]
pub const fn is_bool(&self) -> bool {
matches!(self, Self::Bool | Self::BoolNullable)
}
#[must_use]
pub const fn is_object(&self) -> bool {
matches!(self, Self::Utf8)
}
#[must_use]
pub const fn is_datetime(&self) -> bool {
matches!(self, Self::Datetime64)
}
#[must_use]
pub const fn is_timedelta(&self) -> bool {
matches!(self, Self::Timedelta64)
}
#[must_use]
pub const fn is_categorical(&self) -> bool {
matches!(self, Self::Categorical)
}
#[must_use]
pub const fn is_sparse(&self) -> bool {
matches!(self, Self::Sparse)
}
#[must_use]
pub const fn is_period(&self) -> bool {
matches!(self, Self::Period)
}
#[must_use]
pub const fn is_interval(&self) -> bool {
matches!(self, Self::Interval)
}
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
Self::Bool => "bool",
Self::BoolNullable => "boolean",
Self::Int64 => "int64",
Self::Int64Nullable => "Int64",
Self::Float64 => "float64",
Self::Utf8 => "object",
Self::Datetime64 => "datetime64[ns]",
Self::Timedelta64 => "timedelta64[ns]",
Self::Categorical => "category",
Self::Period => "period",
Self::Interval => "interval",
Self::Sparse => "Sparse",
Self::Null => "object",
}
}
#[must_use]
pub const fn kind(&self) -> char {
match self {
Self::Bool | Self::BoolNullable => 'b',
Self::Int64 | Self::Int64Nullable => 'i',
Self::Float64 => 'f',
Self::Utf8 => 'O',
Self::Datetime64 => 'M',
Self::Timedelta64 => 'm',
Self::Categorical => 'O',
Self::Period => 'O',
Self::Interval => 'O',
Self::Sparse => 'O',
Self::Null => 'O',
}
}
#[must_use]
pub const fn itemsize(&self) -> usize {
match self {
Self::Bool | Self::BoolNullable => 1,
Self::Int64
| Self::Int64Nullable
| Self::Float64
| Self::Datetime64
| Self::Timedelta64
| Self::Period => 8,
Self::Utf8 | Self::Categorical | Self::Interval | Self::Sparse | Self::Null => 8,
}
}
#[must_use]
pub const fn is_extension(&self) -> bool {
matches!(
self,
Self::Categorical
| Self::Sparse
| Self::Period
| Self::Interval
| Self::Int64Nullable
| Self::BoolNullable
)
}
#[must_use]
pub const fn is_nullable(&self) -> bool {
matches!(self, Self::Int64Nullable | Self::BoolNullable)
}
#[must_use]
pub const fn to_non_nullable(&self) -> Self {
match self {
Self::Int64Nullable => Self::Int64,
Self::BoolNullable => Self::Bool,
other => *other,
}
}
#[must_use]
pub const fn to_nullable(&self) -> Self {
match self {
Self::Int64 => Self::Int64Nullable,
Self::Bool => Self::BoolNullable,
other => *other,
}
}
#[must_use]
pub const fn is_signed_integer(&self) -> bool {
matches!(self, Self::Int64 | Self::Int64Nullable)
}
#[must_use]
pub const fn is_string_dtype(&self) -> bool {
matches!(self, Self::Utf8)
}
#[must_use]
pub const fn is_any_real_numeric(&self) -> bool {
self.is_numeric()
}
#[must_use]
pub const fn is_datetime_like(&self) -> bool {
matches!(self, Self::Datetime64 | Self::Timedelta64 | Self::Period)
}
#[must_use]
pub const fn char(&self) -> char {
match self {
Self::Bool | Self::BoolNullable => '?',
Self::Int64 | Self::Int64Nullable => 'l',
Self::Float64 => 'd',
Self::Utf8 => 'O',
Self::Datetime64 => 'M',
Self::Timedelta64 => 'm',
Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 'O',
}
}
#[must_use]
pub const fn num(&self) -> i32 {
match self {
Self::Bool | Self::BoolNullable => 0,
Self::Int64 | Self::Int64Nullable => 7,
Self::Float64 => 12,
Self::Utf8 => 17,
Self::Datetime64 => 21,
Self::Timedelta64 => 22,
Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => 17,
}
}
#[must_use]
pub const fn byteorder(&self) -> char {
'='
}
#[must_use]
pub const fn str_repr(&self) -> &'static str {
match self {
Self::Bool | Self::BoolNullable => "|b1",
Self::Int64 | Self::Int64Nullable => "<i8",
Self::Float64 => "<f8",
Self::Utf8 => "|O8",
Self::Datetime64 => "<M8[ns]",
Self::Timedelta64 => "<m8[ns]",
Self::Categorical | Self::Period | Self::Interval | Self::Sparse | Self::Null => "|O8",
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SparseDType {
pub value_dtype: DType,
pub fill_value: Scalar,
}
impl SparseDType {
pub fn new(value_dtype: DType, fill_value: Scalar) -> Result<Self, TypeError> {
if matches!(value_dtype, DType::Null | DType::Sparse) {
return Err(TypeError::InvalidSparseValueDType { dtype: value_dtype });
}
let fill_value = if fill_value.is_missing() {
Scalar::missing_for_dtype(value_dtype)
} else {
cast_scalar_owned(fill_value, value_dtype)?
};
Ok(Self {
value_dtype,
fill_value,
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum NullKind {
Null,
NaN,
NaT,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
pub enum Scalar {
Null(NullKind),
Bool(bool),
Int64(i64),
Float64(f64),
#[serde(alias = "string", alias = "str")]
Utf8(String),
Timedelta64(i64),
Datetime64(i64),
Period(Period),
Interval(Interval),
}
impl std::fmt::Display for Scalar {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Null(NullKind::NaN) => write!(f, "NaN"),
Self::Null(NullKind::NaT) => write!(f, "NaT"),
Self::Null(NullKind::Null) => write!(f, "None"),
Self::Bool(b) => write!(f, "{}", if *b { "True" } else { "False" }),
Self::Int64(v) => write!(f, "{v}"),
Self::Float64(v) => write!(f, "{v}"),
Self::Utf8(s) => write!(f, "{s}"),
Self::Timedelta64(nanos) => write!(f, "{}", Timedelta::format(*nanos)),
Self::Datetime64(nanos) => {
if *nanos == Timestamp::NAT {
write!(f, "NaT")
} else {
write!(f, "Timestamp[{nanos}]")
}
}
Self::Period(p) => {
if p.ordinal == i64::MIN {
write!(f, "NaT")
} else {
write!(f, "{}", p.calendar_string())
}
}
Self::Interval(interval) => write!(f, "{interval}"),
}
}
}
impl From<bool> for Scalar {
fn from(value: bool) -> Self {
Self::Bool(value)
}
}
impl From<i64> for Scalar {
fn from(value: i64) -> Self {
Self::Int64(value)
}
}
impl From<f64> for Scalar {
fn from(value: f64) -> Self {
Self::Float64(value)
}
}
impl From<&str> for Scalar {
fn from(value: &str) -> Self {
Self::Utf8(value.to_owned())
}
}
impl From<String> for Scalar {
fn from(value: String) -> Self {
Self::Utf8(value)
}
}
impl Scalar {
#[must_use]
pub fn dtype(&self) -> DType {
match self {
Self::Null(_) => DType::Null,
Self::Bool(_) => DType::Bool,
Self::Int64(_) => DType::Int64,
Self::Float64(_) => DType::Float64,
Self::Utf8(_) => DType::Utf8,
Self::Timedelta64(_) => DType::Timedelta64,
Self::Datetime64(_) => DType::Datetime64,
Self::Period(_) => DType::Period,
Self::Interval(_) => DType::Interval,
}
}
#[must_use]
pub fn is_missing(&self) -> bool {
match self {
Self::Null(_) => true,
Self::Float64(v) => v.is_nan(),
Self::Timedelta64(v) => *v == Timedelta::NAT,
Self::Datetime64(v) => *v == Timestamp::NAT,
Self::Period(p) => p.ordinal == i64::MIN,
_ => false,
}
}
#[must_use]
pub fn is_nan(&self) -> bool {
matches!(self, Self::Null(NullKind::NaN)) || matches!(self, Self::Float64(v) if v.is_nan())
}
#[must_use]
pub const fn is_bool(&self) -> bool {
matches!(self, Self::Bool(_))
}
#[must_use]
pub const fn is_integer(&self) -> bool {
matches!(self, Self::Int64(_))
}
#[must_use]
pub const fn is_float(&self) -> bool {
matches!(self, Self::Float64(_))
}
#[must_use]
pub const fn is_numeric(&self) -> bool {
matches!(self, Self::Int64(_) | Self::Float64(_))
}
#[must_use]
pub const fn is_string(&self) -> bool {
matches!(self, Self::Utf8(_))
}
#[must_use]
pub const fn is_datetime(&self) -> bool {
matches!(self, Self::Datetime64(_))
}
#[must_use]
pub const fn is_timedelta(&self) -> bool {
matches!(self, Self::Timedelta64(_))
}
#[must_use]
pub const fn is_period(&self) -> bool {
matches!(self, Self::Period(_))
}
#[must_use]
pub const fn is_interval(&self) -> bool {
matches!(self, Self::Interval(_))
}
#[must_use]
pub fn missing_for_dtype(dtype: DType) -> Self {
match dtype {
DType::Float64 => Self::Null(NullKind::NaN),
DType::Timedelta64 => Self::Timedelta64(Timedelta::NAT),
DType::Datetime64 => Self::Datetime64(Timestamp::NAT),
DType::Period => Self::Period(Period::new(i64::MIN, PeriodFreq::Daily)),
DType::Null => Self::Null(NullKind::Null),
DType::Bool
| DType::BoolNullable
| DType::Int64
| DType::Int64Nullable
| DType::Utf8
| DType::Categorical
| DType::Interval
| DType::Sparse => Self::Null(NullKind::Null),
}
}
#[must_use]
pub fn semantic_eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Float64(a), Self::Float64(b)) => {
if a.is_nan() && b.is_nan() {
return true;
}
if *a == *b {
return true;
}
let diff = (*a - *b).abs();
let max_abs = a.abs().max(b.abs());
if max_abs == 0.0 {
diff < f64::EPSILON
} else {
diff / max_abs < 1e-14
}
}
(Self::Null(_), Self::Float64(v)) | (Self::Float64(v), Self::Null(_)) => v.is_nan(),
(Self::Null(_), Self::Null(_)) => true,
_ => self == other,
}
}
#[must_use]
pub fn semantic_le(&self, other: &Self) -> bool {
match self.semantic_cmp(other) {
std::cmp::Ordering::Less | std::cmp::Ordering::Equal => true,
std::cmp::Ordering::Greater => false,
}
}
#[must_use]
pub fn semantic_ge(&self, other: &Self) -> bool {
match self.semantic_cmp(other) {
std::cmp::Ordering::Greater | std::cmp::Ordering::Equal => true,
std::cmp::Ordering::Less => false,
}
}
#[must_use]
pub fn is_null(&self) -> bool {
matches!(self, Self::Null(_))
}
#[must_use]
pub fn is_na(&self) -> bool {
self.is_missing()
}
#[must_use]
pub fn coalesce(&self, other: &Self) -> Self {
if self.is_missing() {
other.clone()
} else {
self.clone()
}
}
#[must_use]
pub fn semantic_cmp(&self, other: &Self) -> std::cmp::Ordering {
match (self, other) {
(Self::Int64(a), Self::Int64(b)) => a.cmp(b),
(Self::Float64(a), Self::Float64(b)) => {
a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
}
(Self::Utf8(a), Self::Utf8(b)) => a.cmp(b),
(Self::Bool(a), Self::Bool(b)) => a.cmp(b),
(Self::Null(a), Self::Null(b)) => a.cmp(b),
(Self::Timedelta64(a), Self::Timedelta64(b)) => {
if *a == Timedelta::NAT || *b == Timedelta::NAT {
std::cmp::Ordering::Equal
} else {
a.cmp(b)
}
}
(Self::Datetime64(a), Self::Datetime64(b)) => {
if *a == Timestamp::NAT || *b == Timestamp::NAT {
std::cmp::Ordering::Equal
} else {
a.cmp(b)
}
}
(Self::Period(a), Self::Period(b)) => {
if a.ordinal == i64::MIN || b.ordinal == i64::MIN {
std::cmp::Ordering::Equal
} else {
a.ordinal.cmp(&b.ordinal)
}
}
(Self::Interval(a), Self::Interval(b)) => a
.left
.partial_cmp(&b.left)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| {
a.right
.partial_cmp(&b.right)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| a.closed.cmp(&b.closed)),
(Self::Int64(a), Self::Float64(b)) => (*a as f64)
.partial_cmp(b)
.unwrap_or(std::cmp::Ordering::Equal),
(Self::Float64(a), Self::Int64(b)) => a
.partial_cmp(&(*b as f64))
.unwrap_or(std::cmp::Ordering::Equal),
(a, b) => format!("{a:?}").cmp(&format!("{b:?}")),
}
}
pub fn to_f64(&self) -> Result<f64, TypeError> {
match self {
Self::Bool(v) => Ok(if *v { 1.0 } else { 0.0 }),
Self::Int64(v) => Ok(*v as f64),
Self::Float64(v) => Ok(*v),
Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
Self::Utf8(v) => Err(TypeError::NonNumericValue {
value: v.clone(),
dtype: DType::Utf8,
}),
Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Timedelta64(v) => Err(TypeError::NonNumericValue {
value: Timedelta::format(*v),
dtype: DType::Timedelta64,
}),
Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Datetime64(v) => Err(TypeError::NonNumericValue {
value: format!("Timestamp[{v}]"),
dtype: DType::Datetime64,
}),
Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Period(p) => Err(TypeError::NonNumericValue {
value: p.calendar_string(),
dtype: DType::Period,
}),
Self::Interval(v) => Err(TypeError::NonNumericValue {
value: v.to_string(),
dtype: DType::Interval,
}),
}
}
pub fn to_i64(&self) -> Result<i64, TypeError> {
match self {
Self::Bool(v) => Ok(if *v { 1 } else { 0 }),
Self::Int64(v) => Ok(*v),
Self::Float64(v) => Ok(*v as i64),
Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
Self::Utf8(v) => Err(TypeError::NonNumericValue {
value: v.clone(),
dtype: DType::Utf8,
}),
Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Timedelta64(v) => Ok(*v),
Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Datetime64(v) => Ok(*v),
Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Period(p) => Ok(p.ordinal),
Self::Interval(v) => Err(TypeError::NonNumericValue {
value: v.to_string(),
dtype: DType::Interval,
}),
}
}
pub fn to_bool(&self) -> Result<bool, TypeError> {
match self {
Self::Bool(v) => Ok(*v),
Self::Int64(v) => Ok(*v != 0),
Self::Float64(v) => Ok(*v != 0.0 && !v.is_nan()),
Self::Null(kind) => Err(TypeError::ValueIsMissing { kind: *kind }),
Self::Utf8(v) => Ok(!v.is_empty()),
Self::Timedelta64(v) if *v == Timedelta::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Timedelta64(v) => Ok(*v != 0),
Self::Datetime64(v) if *v == Timestamp::NAT => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Datetime64(v) => Ok(*v != 0),
Self::Period(p) if p.ordinal == i64::MIN => Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
}),
Self::Period(p) => Ok(p.ordinal != 0),
Self::Interval(_) => Ok(true),
}
}
pub fn to_str(&self) -> String {
match self {
Self::Bool(v) => if *v { "True" } else { "False" }.to_string(),
Self::Int64(v) => v.to_string(),
Self::Float64(v) => {
if v.is_nan() {
"nan".to_string()
} else if v.is_infinite() {
if *v > 0.0 { "inf" } else { "-inf" }.to_string()
} else {
v.to_string()
}
}
Self::Null(_) => "NaN".to_string(),
Self::Utf8(v) => v.clone(),
Self::Timedelta64(v) => Timedelta::format(*v),
Self::Datetime64(v) if *v == Timestamp::NAT => "NaT".to_string(),
Self::Datetime64(v) => Timestamp::from_nanos(*v).isoformat(),
Self::Period(p) if p.ordinal == i64::MIN => "NaT".to_string(),
Self::Period(p) => p.calendar_string(),
Self::Interval(v) => v.to_string(),
}
}
}
#[derive(Debug, Error, Clone, PartialEq)]
pub enum TypeError {
#[error("dtype coercion from {left:?} to {right:?} has no compatible common type")]
IncompatibleDtypes { left: DType, right: DType },
#[error("cannot cast scalar of dtype {from:?} to {to:?}")]
InvalidCast { from: DType, to: DType },
#[error("cannot cast float {value} to int64 without loss")]
LossyFloatToInt { value: f64 },
#[error("expected 0/1 for bool cast from int64 but found {value}")]
InvalidBoolInt { value: i64 },
#[error("expected 0.0/1.0 for bool cast from float64 but found {value}")]
InvalidBoolFloat { value: f64 },
#[error("value {value:?} has non-numeric dtype {dtype:?}")]
NonNumericValue { value: String, dtype: DType },
#[error("value is missing ({kind:?})")]
ValueIsMissing { kind: NullKind },
#[error("sparse value dtype cannot be {dtype:?}")]
InvalidSparseValueDType { dtype: DType },
#[error("interval_range step must be finite, positive, and not NaN (got {step})")]
InvalidIntervalStep { step: f64 },
#[error("interval_range step {step} does not evenly divide range end-start={span}")]
IntervalStepDoesNotDivide { step: f64, span: f64 },
#[error("cannot parse '{value}' as {target}")]
ValueNotParseable { value: String, target: String },
}
pub fn common_dtype(left: DType, right: DType) -> Result<DType, TypeError> {
use DType::{
Bool, BoolNullable, Categorical, Datetime64, Float64, Int64, Int64Nullable, Null, Sparse,
Timedelta64,
};
let out = match (left, right) {
(a, b) if a == b => a,
(Null, other) | (other, Null) => other,
(Categorical, Categorical) => Categorical,
(Bool, Int64) | (Int64, Bool) => Int64,
(Bool, Int64Nullable) | (Int64Nullable, Bool) => Int64Nullable,
(BoolNullable, Int64) | (Int64, BoolNullable) => Int64Nullable,
(BoolNullable, Int64Nullable) | (Int64Nullable, BoolNullable) => Int64Nullable,
(Bool, BoolNullable) | (BoolNullable, Bool) => BoolNullable,
(Bool, Float64) | (Float64, Bool) => Float64,
(BoolNullable, Float64) | (Float64, BoolNullable) => Float64,
(Int64, Float64) | (Float64, Int64) => Float64,
(Int64Nullable, Float64) | (Float64, Int64Nullable) => Float64,
(Int64, Int64Nullable) | (Int64Nullable, Int64) => Int64Nullable,
(Timedelta64, Timedelta64) => Timedelta64,
(Datetime64, Datetime64) => Datetime64,
(Sparse, _) | (_, Sparse) => return Err(TypeError::IncompatibleDtypes { left, right }),
_ => return Err(TypeError::IncompatibleDtypes { left, right }),
};
Ok(out)
}
pub fn infer_dtype(values: &[Scalar]) -> Result<DType, TypeError> {
let mut current = DType::Null;
let mut saw_utf8 = false;
let mut saw_timedelta = false;
let mut saw_datetime = false;
let mut saw_non_utf8_non_null = false;
for value in values {
match value.dtype() {
DType::Null => {}
DType::Utf8 => saw_utf8 = true,
DType::Timedelta64 => {
saw_timedelta = true;
if current == DType::Null {
current = DType::Timedelta64;
} else if current != DType::Timedelta64 {
return Err(TypeError::IncompatibleDtypes {
left: current,
right: DType::Timedelta64,
});
}
}
DType::Datetime64 => {
saw_datetime = true;
if current == DType::Null {
current = DType::Datetime64;
} else if current != DType::Datetime64 {
return Err(TypeError::IncompatibleDtypes {
left: current,
right: DType::Datetime64,
});
}
}
other => {
saw_non_utf8_non_null = true;
current = common_dtype(current, other)?;
}
}
if saw_utf8 && saw_non_utf8_non_null {
return Ok(DType::Utf8);
}
if saw_timedelta && saw_non_utf8_non_null {
return Err(TypeError::IncompatibleDtypes {
left: DType::Timedelta64,
right: current,
});
}
if saw_datetime && saw_non_utf8_non_null {
return Err(TypeError::IncompatibleDtypes {
left: DType::Datetime64,
right: current,
});
}
}
if saw_utf8 {
Ok(DType::Utf8)
} else {
Ok(current)
}
}
pub fn cast_scalar_owned(value: Scalar, target: DType) -> Result<Scalar, TypeError> {
let from = value.dtype();
if from == target {
return Ok(value);
}
if (from == DType::Int64 && target == DType::Int64Nullable)
|| (from == DType::Int64Nullable && target == DType::Int64)
{
return Ok(value);
}
if (from == DType::Bool && target == DType::BoolNullable)
|| (from == DType::BoolNullable && target == DType::Bool)
{
return Ok(value);
}
if target == DType::Utf8 {
return Ok(Scalar::Utf8(scalar_to_string_for_astype(value)));
}
if target == DType::Bool
&& let Scalar::Float64(v) = &value
&& v.is_nan()
{
return Ok(Scalar::Bool(true));
}
if value.is_missing() {
return Ok(Scalar::missing_for_dtype(target));
}
match target {
DType::Null => Ok(Scalar::Null(NullKind::Null)),
DType::Bool => match &value {
Scalar::Int64(v) => Ok(Scalar::Bool(*v != 0)),
Scalar::Float64(v) => Ok(Scalar::Bool(*v != 0.0)),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::BoolNullable => match &value {
Scalar::Bool(b) => Ok(Scalar::Bool(*b)),
Scalar::Int64(0) => Ok(Scalar::Bool(false)),
Scalar::Int64(1) => Ok(Scalar::Bool(true)),
Scalar::Int64(v) => Err(TypeError::InvalidBoolInt { value: *v }),
Scalar::Float64(v) if *v == 0.0 => Ok(Scalar::Bool(false)),
Scalar::Float64(v) if *v == 1.0 => Ok(Scalar::Bool(true)),
Scalar::Float64(v) => Err(TypeError::InvalidBoolFloat { value: *v }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Int64 | DType::Int64Nullable => match &value {
Scalar::Bool(v) => Ok(Scalar::Int64(i64::from(*v))),
Scalar::Float64(v) => {
if !v.is_finite() {
return Err(TypeError::LossyFloatToInt { value: *v });
}
if *v < i64::MIN as f64 || *v >= 9223372036854775808.0 {
return Err(TypeError::LossyFloatToInt { value: *v });
}
Ok(Scalar::Int64(*v as i64))
}
Scalar::Utf8(s) => {
if let Ok(v) = s.parse::<i64>() {
return Ok(Scalar::Int64(v));
}
if let Ok(f) = s.parse::<f64>()
&& f.is_finite()
&& f.fract() == 0.0
&& f >= i64::MIN as f64
&& f < 9223372036854775808.0
{
return Ok(Scalar::Int64(f as i64));
}
Err(TypeError::InvalidCast { from, to: target })
}
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Float64 => match &value {
Scalar::Bool(v) => Ok(Scalar::Float64(if *v { 1.0 } else { 0.0 })),
Scalar::Int64(v) => Ok(Scalar::Float64(*v as f64)),
Scalar::Utf8(s) => s
.parse::<f64>()
.map(Scalar::Float64)
.map_err(|_| TypeError::InvalidCast { from, to: target }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Utf8 => Ok(Scalar::Utf8(scalar_to_string_for_astype(value))),
DType::Categorical => Err(TypeError::InvalidCast { from, to: target }),
DType::Timedelta64 => match &value {
Scalar::Int64(v) => Ok(Scalar::Timedelta64(*v)),
Scalar::Utf8(s) => Timedelta::parse(s)
.map(Scalar::Timedelta64)
.map_err(|_| TypeError::InvalidCast { from, to: target }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Datetime64 => match &value {
Scalar::Int64(v) => Ok(Scalar::Datetime64(*v)),
Scalar::Utf8(s) => Timestamp::parse(s)
.map(|timestamp| Scalar::Datetime64(timestamp.nanos))
.map_err(|_| TypeError::InvalidCast { from, to: target }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Period => match &value {
Scalar::Int64(v) => Ok(Scalar::Period(Period::new(*v, PeriodFreq::Daily))),
Scalar::Utf8(s) => Period::parse(s)
.map(Scalar::Period)
.map_err(|_| TypeError::InvalidCast { from, to: target }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Interval => match &value {
Scalar::Utf8(s) => Interval::parse(s)
.map(Scalar::Interval)
.map_err(|_| TypeError::InvalidCast { from, to: target }),
_ => Err(TypeError::InvalidCast { from, to: target }),
},
DType::Sparse => Err(TypeError::InvalidCast { from, to: target }),
}
}
fn scalar_to_string_for_astype(value: Scalar) -> String {
match value {
Scalar::Null(NullKind::Null) => "None".to_owned(),
Scalar::Null(NullKind::NaN) => "nan".to_owned(),
Scalar::Null(NullKind::NaT) => "NaT".to_owned(),
Scalar::Bool(true) => "True".to_owned(),
Scalar::Bool(false) => "False".to_owned(),
Scalar::Int64(v) => v.to_string(),
Scalar::Float64(v) => float_to_string_for_astype(v),
Scalar::Utf8(s) => s,
Scalar::Timedelta64(v) if v == Timedelta::NAT => "NaT".to_owned(),
Scalar::Timedelta64(v) => Timedelta::format(v),
Scalar::Datetime64(v) if v == Timestamp::NAT => "NaT".to_owned(),
Scalar::Datetime64(v) => format!("Timestamp[{v}]"),
Scalar::Period(p) if p.ordinal == i64::MIN => "NaT".to_owned(),
Scalar::Period(p) => p.calendar_string(),
Scalar::Interval(v) => v.to_string(),
}
}
fn float_to_string_for_astype(value: f64) -> String {
if value.is_nan() {
return "nan".to_owned();
}
if value.is_infinite() {
return value.to_string(); }
let s = format!("{value:?}");
match s.split_once('e') {
None => s,
Some((mantissa, exp)) => {
let (sign, digits) = match exp.strip_prefix('-') {
Some(d) => ('-', d),
None => ('+', exp.strip_prefix('+').unwrap_or(exp)),
};
format!("{mantissa}e{sign}{digits:0>2}")
}
}
}
pub fn cast_scalar(value: &Scalar, target: DType) -> Result<Scalar, TypeError> {
cast_scalar_owned(value.clone(), target)
}
#[derive(Debug, Error, Clone, PartialEq)]
pub enum TimedeltaError {
#[error("invalid timedelta string: {0}")]
InvalidFormat(String),
#[error("overflow in timedelta computation")]
Overflow,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct TimedeltaComponents {
pub days: i64,
pub hours: i64,
pub minutes: i64,
pub seconds: i64,
pub milliseconds: i64,
pub microseconds: i64,
pub nanoseconds: i64,
}
pub struct Timedelta;
impl Timedelta {
pub const NANOS_PER_MICRO: i64 = 1_000;
pub const NANOS_PER_MILLI: i64 = 1_000_000;
pub const NANOS_PER_SEC: i64 = 1_000_000_000;
pub const NANOS_PER_MIN: i64 = 60 * Self::NANOS_PER_SEC;
pub const NANOS_PER_HOUR: i64 = 60 * Self::NANOS_PER_MIN;
pub const NANOS_PER_DAY: i64 = 24 * Self::NANOS_PER_HOUR;
pub const NANOS_PER_WEEK: i64 = 7 * Self::NANOS_PER_DAY;
pub const NAT: i64 = i64::MIN;
pub fn parse(s: &str) -> Result<i64, TimedeltaError> {
let s = s.trim();
if s.eq_ignore_ascii_case("nat") {
return Ok(Self::NAT);
}
let (negative, s) = if let Some(rest) = s.strip_prefix('-') {
(true, rest.trim())
} else {
(false, s)
};
if let Some(nanos) = Self::try_parse_time_format(s) {
return Ok(if negative { -nanos } else { nanos });
}
if let Some(nanos) = Self::try_parse_iso8601_duration(s) {
return Ok(if negative { -nanos } else { nanos });
}
let nanos = Self::parse_compound(s)?;
Ok(if negative { -nanos } else { nanos })
}
fn try_parse_iso8601_duration(s: &str) -> Option<i64> {
let mut rest = s.strip_prefix('P')?;
if rest.is_empty() {
return None;
}
let mut total: i64 = 0;
let mut saw_component = false;
while !rest.is_empty() {
if let Some(after_t) = rest.strip_prefix('T') {
rest = after_t;
continue;
}
let num_end = rest.find(|c: char| !c.is_ascii_digit() && c != '.')?;
if num_end == 0 {
return None;
}
let num_str = &rest[..num_end];
let unit = rest.as_bytes()[num_end];
let is_fractional = num_str.contains('.');
rest = &rest[num_end + 1..];
let (multiplier, frac_ok) = match unit {
b'W' => (Self::NANOS_PER_WEEK, false),
b'D' => (Self::NANOS_PER_DAY, false),
b'H' => (Self::NANOS_PER_HOUR, false),
b'M' => (Self::NANOS_PER_MIN, false),
b'S' => (Self::NANOS_PER_SEC, true),
_ => return None,
};
if is_fractional {
if !frac_ok {
return None;
}
let value: f64 = num_str.parse().ok()?;
let product = value * multiplier as f64;
if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
return None;
}
total = total.checked_add(product.round() as i64)?;
} else {
let value: i64 = num_str.parse().ok()?;
total = total.checked_add(value.checked_mul(multiplier)?)?;
}
saw_component = true;
}
saw_component.then_some(total)
}
fn try_parse_time_format(s: &str) -> Option<i64> {
let parts: Vec<&str> = s.split(':').collect();
if parts.len() < 2 || parts.len() > 3 {
return None;
}
let hours: i64 = parts[0].parse().ok()?;
let minutes: i64 = parts[1].parse().ok()?;
let (seconds, frac_nanos) = if parts.len() == 3 {
if let Some((sec_str, frac_str)) = parts[2].split_once('.') {
let sec: i64 = sec_str.parse().ok()?;
if !frac_str.bytes().all(|byte| byte.is_ascii_digit()) {
return None;
}
let mut frac = 0_i64;
let taken = frac_str.len().min(9);
for byte in frac_str.bytes().take(9) {
frac = frac * 10 + i64::from(byte - b'0');
}
for _ in taken..9 {
frac *= 10;
}
(sec, frac)
} else {
let sec: i64 = parts[2].parse().ok()?;
(sec, 0)
}
} else {
(0, 0)
};
hours
.checked_mul(Self::NANOS_PER_HOUR)?
.checked_add(minutes.checked_mul(Self::NANOS_PER_MIN)?)?
.checked_add(seconds.checked_mul(Self::NANOS_PER_SEC)?)?
.checked_add(frac_nanos)
}
fn parse_compound(s: &str) -> Result<i64, TimedeltaError> {
let mut total: i64 = 0;
let mut remaining = s;
while !remaining.is_empty() {
remaining = remaining.trim_start();
if remaining.is_empty() {
break;
}
if remaining.contains(':')
&& let Some(time_nanos) = Self::try_parse_time_format(remaining)
{
total = total
.checked_add(time_nanos)
.ok_or(TimedeltaError::Overflow)?;
break;
}
let num_end = remaining
.find(|c: char| !c.is_ascii_digit() && c != '.' && c != '-')
.unwrap_or(remaining.len());
if num_end == 0 {
return Err(TimedeltaError::InvalidFormat(s.to_string()));
}
let num_str = &remaining[..num_end];
let num: f64 = num_str
.parse()
.map_err(|_| TimedeltaError::InvalidFormat(s.to_string()))?;
remaining = remaining[num_end..].trim_start();
let unit_end = remaining
.find(|c: char| c.is_ascii_digit() || c.is_whitespace())
.unwrap_or(remaining.len());
let unit = &remaining[..unit_end];
remaining = &remaining[unit_end..];
let multiplier = Self::unit_to_nanos(unit)
.ok_or_else(|| TimedeltaError::InvalidFormat(s.to_string()))?;
let product = num * multiplier as f64;
if !product.is_finite() || product.abs() >= 9223372036854775808.0 {
return Err(TimedeltaError::Overflow);
}
let nanos = product.round() as i64;
total = total.checked_add(nanos).ok_or(TimedeltaError::Overflow)?;
}
if total == 0 && !s.trim().is_empty() && s.trim() != "0" {
return Err(TimedeltaError::InvalidFormat(s.to_string()));
}
Ok(total)
}
#[must_use]
pub fn unit_to_nanos(unit: &str) -> Option<i64> {
match unit.to_lowercase().as_str() {
"w" | "week" | "weeks" => Some(Self::NANOS_PER_WEEK),
"d" | "day" | "days" => Some(Self::NANOS_PER_DAY),
"h" | "hr" | "hour" | "hours" => Some(Self::NANOS_PER_HOUR),
"m" | "min" | "minute" | "minutes" | "t" => Some(Self::NANOS_PER_MIN),
"s" | "sec" | "second" | "seconds" => Some(Self::NANOS_PER_SEC),
"ms" | "milli" | "millis" | "millisecond" | "milliseconds" | "l" => {
Some(Self::NANOS_PER_MILLI)
}
"us" | "µs" | "micro" | "micros" | "microsecond" | "microseconds" | "u" => {
Some(Self::NANOS_PER_MICRO)
}
"ns" | "nano" | "nanos" | "nanosecond" | "nanoseconds" | "n" => Some(1),
"" => Some(Self::NANOS_PER_DAY),
_ => None,
}
}
pub fn components(nanos: i64) -> TimedeltaComponents {
if nanos == Self::NAT {
return TimedeltaComponents::default();
}
let days = nanos.div_euclid(Self::NANOS_PER_DAY);
let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
let hours = rem / Self::NANOS_PER_HOUR;
let rem = rem % Self::NANOS_PER_HOUR;
let minutes = rem / Self::NANOS_PER_MIN;
let rem = rem % Self::NANOS_PER_MIN;
let seconds = rem / Self::NANOS_PER_SEC;
let rem = rem % Self::NANOS_PER_SEC;
let milliseconds = rem / Self::NANOS_PER_MILLI;
let rem = rem % Self::NANOS_PER_MILLI;
let microseconds = rem / Self::NANOS_PER_MICRO;
let nanoseconds = rem % Self::NANOS_PER_MICRO;
TimedeltaComponents {
days,
hours,
minutes,
seconds,
milliseconds,
microseconds,
nanoseconds,
}
}
pub fn total_seconds(nanos: i64) -> f64 {
if nanos == Self::NAT {
f64::NAN
} else {
nanos as f64 / Self::NANOS_PER_SEC as f64
}
}
#[must_use]
pub fn as_unit(nanos: i64, unit: &str) -> f64 {
if nanos == Self::NAT {
return f64::NAN;
}
let nanos_f = nanos as f64;
match unit {
"ns" | "nanoseconds" => nanos_f,
"us" | "microseconds" => nanos_f / Self::NANOS_PER_MICRO as f64,
"ms" | "milliseconds" => nanos_f / Self::NANOS_PER_MILLI as f64,
"s" | "seconds" => nanos_f / Self::NANOS_PER_SEC as f64,
"m" | "minutes" => nanos_f / Self::NANOS_PER_MIN as f64,
"h" | "hours" => nanos_f / Self::NANOS_PER_HOUR as f64,
"D" | "days" => nanos_f / Self::NANOS_PER_DAY as f64,
_ => f64::NAN,
}
}
#[must_use]
pub fn days(nanos: i64) -> i64 {
if nanos == Self::NAT {
return 0; }
nanos.div_euclid(Self::NANOS_PER_DAY)
}
#[must_use]
pub fn seconds(nanos: i64) -> i64 {
if nanos == Self::NAT {
return 0;
}
nanos.rem_euclid(Self::NANOS_PER_DAY) / Self::NANOS_PER_SEC
}
#[must_use]
pub fn microseconds(nanos: i64) -> i64 {
if nanos == Self::NAT {
return 0;
}
nanos.rem_euclid(Self::NANOS_PER_SEC) / Self::NANOS_PER_MICRO
}
#[must_use]
pub fn nanoseconds(nanos: i64) -> i64 {
if nanos == Self::NAT {
return 0;
}
nanos.rem_euclid(Self::NANOS_PER_MICRO)
}
pub fn format(nanos: i64) -> String {
if nanos == Self::NAT {
return "NaT".to_string();
}
let days = nanos.div_euclid(Self::NANOS_PER_DAY);
let rem = nanos.rem_euclid(Self::NANOS_PER_DAY);
let hours = rem / Self::NANOS_PER_HOUR;
let minutes = (rem % Self::NANOS_PER_HOUR) / Self::NANOS_PER_MIN;
let seconds = (rem % Self::NANOS_PER_MIN) / Self::NANOS_PER_SEC;
let frac = rem % Self::NANOS_PER_SEC;
let time_part = format!("{hours:02}:{minutes:02}:{seconds:02}");
let sep = if days < 0 { "+" } else { "" };
if frac > 0 {
if frac % 1_000 == 0 {
format!("{days} days {sep}{time_part}.{:06}", frac / 1_000)
} else {
format!("{days} days {sep}{time_part}.{frac:09}")
}
} else {
format!("{days} days {sep}{time_part}")
}
}
pub fn from_unit(value: f64, unit: &str) -> Result<i64, TimedeltaError> {
let multiplier = Self::unit_to_nanos(unit)
.ok_or_else(|| TimedeltaError::InvalidFormat(unit.to_string()))?;
Ok((value * multiplier as f64).round() as i64)
}
#[must_use]
pub fn add(a: i64, b: i64) -> i64 {
if a == Self::NAT || b == Self::NAT {
return Self::NAT;
}
a.saturating_add(b)
}
#[must_use]
pub fn sub(a: i64, b: i64) -> i64 {
if a == Self::NAT || b == Self::NAT {
return Self::NAT;
}
a.saturating_sub(b)
}
#[must_use]
pub fn neg(a: i64) -> i64 {
if a == Self::NAT {
return Self::NAT;
}
a.saturating_neg()
}
#[must_use]
pub fn abs(a: i64) -> i64 {
if a == Self::NAT {
return Self::NAT;
}
a.saturating_abs()
}
#[must_use]
pub fn mul_scalar(a: i64, factor: i64) -> i64 {
if a == Self::NAT {
return Self::NAT;
}
a.saturating_mul(factor)
}
#[must_use]
pub fn div_scalar(a: i64, divisor: i64) -> i64 {
if a == Self::NAT || divisor == 0 {
return Self::NAT;
}
let q = a / divisor;
let r = a % divisor;
if r != 0 && (r < 0) != (divisor < 0) {
q - 1
} else {
q
}
}
#[must_use]
pub fn div_timedelta(a: i64, b: i64) -> f64 {
if a == Self::NAT || b == Self::NAT {
return f64::NAN;
}
(a as f64) / (b as f64)
}
#[must_use]
pub fn isoformat(nanos: i64) -> String {
if nanos == Self::NAT {
return "NaT".to_string();
}
let negative = nanos < 0;
let abs_nanos = nanos.saturating_abs();
let days = abs_nanos / Self::NANOS_PER_DAY;
let remaining = abs_nanos % Self::NANOS_PER_DAY;
let hours = remaining / Self::NANOS_PER_HOUR;
let remaining = remaining % Self::NANOS_PER_HOUR;
let minutes = remaining / Self::NANOS_PER_MIN;
let remaining = remaining % Self::NANOS_PER_MIN;
let seconds = remaining / Self::NANOS_PER_SEC;
let sub_sec_nanos = remaining % Self::NANOS_PER_SEC;
let mut result = String::new();
if negative {
result.push('-');
}
result.push_str(&format!("P{days}DT{hours}H{minutes}M"));
if sub_sec_nanos == 0 {
result.push_str(&format!("{seconds}S"));
} else {
let frac = format!("{:09}", sub_sec_nanos);
let trimmed = frac.trim_end_matches('0');
result.push_str(&format!("{seconds}.{trimmed}S"));
}
result
}
#[must_use]
pub fn floor(nanos: i64, freq: &str) -> i64 {
if nanos == Self::NAT {
return Self::NAT;
}
let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
return Self::NAT;
};
if unit_nanos == 0 {
return Self::NAT;
}
nanos.div_euclid(unit_nanos).saturating_mul(unit_nanos)
}
#[must_use]
pub fn ceil(nanos: i64, freq: &str) -> i64 {
if nanos == Self::NAT {
return Self::NAT;
}
let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
return Self::NAT;
};
if unit_nanos == 0 {
return Self::NAT;
}
let remainder = nanos.rem_euclid(unit_nanos);
if remainder == 0 {
nanos
} else {
nanos.saturating_add(unit_nanos - remainder)
}
}
#[must_use]
pub fn round(nanos: i64, freq: &str) -> i64 {
if nanos == Self::NAT {
return Self::NAT;
}
let Some(unit_nanos) = Self::unit_to_nanos(freq) else {
return Self::NAT;
};
if unit_nanos == 0 {
return Self::NAT;
}
let negative = nanos < 0;
let abs_nanos = nanos.saturating_abs();
let quotient = abs_nanos / unit_nanos;
let remainder = abs_nanos % unit_nanos;
let half = unit_nanos / 2;
let rounded = if remainder > half {
(quotient + 1) * unit_nanos
} else if remainder < half {
quotient * unit_nanos
} else {
if quotient % 2 == 0 {
quotient * unit_nanos
} else {
(quotient + 1) * unit_nanos
}
};
if negative { -rounded } else { rounded }
}
}
fn days_in_month(year: i64, month: u32) -> Option<u32> {
if !(1..=12).contains(&month) {
return None;
}
let is_leap = (year % 4 == 0 && year % 100 != 0) || year % 400 == 0;
let days: [u32; 12] = [
31,
if is_leap { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
];
Some(days[(month - 1) as usize])
}
fn iso_weeks_in_year(year: i64) -> i64 {
fn p(y: i64) -> i64 {
(y + y.div_euclid(4) - y.div_euclid(100) + y.div_euclid(400)).rem_euclid(7)
}
if p(year) == 4 || p(year - 1) == 3 {
53
} else {
52
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Timestamp {
pub nanos: i64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tz: Option<String>,
}
impl Timestamp {
pub const NAT: i64 = i64::MIN;
#[must_use]
pub const fn from_nanos(nanos: i64) -> Self {
Self { nanos, tz: None }
}
#[must_use]
pub fn from_nanos_tz(nanos: i64, tz_name: impl Into<String>) -> Self {
Self {
nanos,
tz: Some(tz_name.into()),
}
}
#[must_use]
pub fn now() -> Self {
use std::time::{SystemTime, UNIX_EPOCH};
let duration = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default();
let nanos = duration.as_nanos() as i64;
Self { nanos, tz: None }
}
#[must_use]
pub fn utcnow() -> Self {
Self::now()
}
#[must_use]
pub fn today() -> Self {
let now = Self::now();
now.normalize()
}
#[must_use]
pub const fn nat() -> Self {
Self {
nanos: Self::NAT,
tz: None,
}
}
#[must_use]
pub const fn is_nat(&self) -> bool {
self.nanos == Self::NAT
}
#[must_use]
pub const fn value(&self) -> i64 {
self.nanos
}
#[must_use]
pub const fn unit(&self) -> Option<&'static str> {
if self.is_nat() { None } else { Some("ns") }
}
#[must_use]
pub const fn resolution(&self) -> Option<&'static str> {
if self.is_nat() { None } else { Some("ns") }
}
#[must_use]
pub const fn asm8(&self) -> i64 {
self.value()
}
#[must_use]
pub const fn to_datetime64(&self) -> i64 {
self.value()
}
#[must_use]
pub const fn to_numpy(&self) -> i64 {
self.value()
}
pub fn timestamp(&self) -> Result<f64, TypeError> {
if self.is_nat() {
return Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
});
}
let seconds = self.nanos as f64 / 1_000_000_000.0;
let rounded = format!("{seconds:.6}").parse().unwrap_or(seconds);
Ok(rounded)
}
#[must_use]
pub fn add_timedelta(&self, td_nanos: i64) -> Self {
if self.is_nat() || td_nanos == Timedelta::NAT {
return Self::nat();
}
Self {
nanos: self.nanos.saturating_add(td_nanos),
tz: self.tz.clone(),
}
}
#[must_use]
pub fn sub_timedelta(&self, td_nanos: i64) -> Self {
if self.is_nat() || td_nanos == Timedelta::NAT {
return Self::nat();
}
Self {
nanos: self.nanos.saturating_sub(td_nanos),
tz: self.tz.clone(),
}
}
#[must_use]
pub fn sub_timestamp(&self, other: &Self) -> i64 {
if self.is_nat() || other.is_nat() {
return Timedelta::NAT;
}
self.nanos.saturating_sub(other.nanos)
}
#[must_use]
pub fn semantic_eq(&self, other: &Self) -> bool {
if self.is_nat() && other.is_nat() {
return true;
}
if self.is_nat() || other.is_nat() {
return false;
}
self.nanos == other.nanos && self.tz == other.tz
}
#[must_use]
pub fn floor_to(&self, unit_nanos: i64) -> Self {
if self.is_nat() || unit_nanos <= 0 {
return Self::nat();
}
let Some(nanos) = self.nanos.div_euclid(unit_nanos).checked_mul(unit_nanos) else {
return Self::nat();
};
Self {
nanos,
tz: self.tz.clone(),
}
}
#[must_use]
pub fn ceil_to(&self, unit_nanos: i64) -> Self {
if self.is_nat() || unit_nanos <= 0 {
return Self::nat();
}
let rem = self.nanos.rem_euclid(unit_nanos);
let nanos = if rem == 0 {
self.nanos
} else {
self.nanos.saturating_add(unit_nanos - rem)
};
Self {
nanos,
tz: self.tz.clone(),
}
}
#[must_use]
pub fn round_to(&self, unit_nanos: i64) -> Self {
if self.is_nat() || unit_nanos <= 0 {
return Self::nat();
}
let floor = self.nanos.div_euclid(unit_nanos);
let rem = self.nanos.rem_euclid(unit_nanos);
let half = unit_nanos / 2;
let chosen_floor = if rem < half {
floor
} else if rem > half {
floor + 1
} else if unit_nanos % 2 != 0 {
floor + 1
} else {
if floor % 2 == 0 { floor } else { floor + 1 }
};
Self {
nanos: chosen_floor.saturating_mul(unit_nanos),
tz: self.tz.clone(),
}
}
#[must_use]
pub fn floor_to_unit(&self, unit: &str) -> Self {
match Timedelta::unit_to_nanos(unit) {
Some(unit_nanos) => self.floor_to(unit_nanos),
None => Self::nat(),
}
}
#[must_use]
pub fn ceil_to_unit(&self, unit: &str) -> Self {
match Timedelta::unit_to_nanos(unit) {
Some(unit_nanos) => self.ceil_to(unit_nanos),
None => Self::nat(),
}
}
#[must_use]
pub fn round_to_unit(&self, unit: &str) -> Self {
match Timedelta::unit_to_nanos(unit) {
Some(unit_nanos) => self.round_to(unit_nanos),
None => Self::nat(),
}
}
#[must_use]
pub fn floor(&self, freq: &str) -> Self {
self.floor_to_unit(freq)
}
#[must_use]
pub fn ceil(&self, freq: &str) -> Self {
self.ceil_to_unit(freq)
}
#[must_use]
pub fn round(&self, freq: &str) -> Self {
self.round_to_unit(freq)
}
#[must_use]
pub fn year(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
Some(if m <= 2 { y + 1 } else { y })
}
#[must_use]
pub fn month(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
Some(if mp < 10 { mp + 3 } else { mp - 9 })
}
#[must_use]
pub fn day(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
Some(doy - (153 * mp + 2) / 5 + 1)
}
#[must_use]
pub fn hour(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let secs_of_day =
self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
Some(secs_of_day / 3600)
}
#[must_use]
pub fn minute(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let secs_of_day =
self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
Some((secs_of_day % 3600) / 60)
}
#[must_use]
pub fn second(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let secs_of_day =
self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY) / Timedelta::NANOS_PER_SEC;
Some(secs_of_day % 60)
}
#[must_use]
pub fn microsecond(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
Some((sub_nanos / 1000) as i64)
}
#[must_use]
pub fn nanosecond(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
Some((sub_nanos % 1000) as i64)
}
#[must_use]
pub fn dayofweek(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let dow = ((days_since_epoch + 3) % 7 + 7) % 7;
Some(dow)
}
#[must_use]
pub fn weekday(&self) -> Option<i64> {
self.dayofweek()
}
#[must_use]
pub fn day_of_week(&self) -> Option<i64> {
self.dayofweek()
}
#[must_use]
pub fn dayofyear(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let m = self.month()?;
let d = self.day()?;
let y = self.year()?;
let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
let base = days_before[(m - 1) as usize] + d;
if is_leap && m > 2 {
Some(base + 1)
} else {
Some(base)
}
}
#[must_use]
pub fn day_of_year(&self) -> Option<i64> {
self.dayofyear()
}
#[must_use]
pub fn toordinal(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let y = self.year()?;
let m = self.month()?;
let d = self.day()?;
let y_minus_1 = y - 1;
let mut ordinal = y_minus_1 * 365 + y_minus_1 / 4 - y_minus_1 / 100 + y_minus_1 / 400;
let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
let days_before: [i64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
ordinal += days_before[(m - 1) as usize];
if is_leap && m > 2 {
ordinal += 1;
}
ordinal += d;
Some(ordinal)
}
#[must_use]
pub fn fromordinal(ordinal: i64) -> Self {
if ordinal <= 0 {
return Self {
nanos: Self::NAT,
tz: None,
};
}
let days_since_epoch = ordinal - 719163;
match days_since_epoch.checked_mul(Timedelta::NANOS_PER_DAY) {
Some(nanos) => Self { nanos, tz: None },
None => Self::nat(),
}
}
#[must_use]
pub fn to_julian_date(&self) -> f64 {
if self.is_nat() {
return f64::NAN;
}
let ordinal = match self.toordinal() {
Some(o) => o,
None => return f64::NAN,
};
let h = self.hour().unwrap_or(0) as f64;
let m = self.minute().unwrap_or(0) as f64;
let s = self.second().unwrap_or(0) as f64;
let us = self.microsecond().unwrap_or(0) as f64;
let ns = self.nanosecond().unwrap_or(0) as f64;
let frac_day =
(h + m / 60.0 + s / 3600.0 + us / 3_600_000_000.0 + ns / 3_600_000_000_000.0) / 24.0;
1721424.5 + ordinal as f64 + frac_day
}
#[must_use]
pub fn quarter(&self) -> Option<i64> {
self.month().map(|m| (m - 1) / 3 + 1)
}
#[must_use]
pub fn weekofyear(&self) -> Option<i64> {
if self.is_nat() {
return None;
}
let doy = self.dayofyear()?;
let dow = self.dayofweek()?;
let year = self.year()?;
let iso_dow = if dow == 6 { 7 } else { dow + 1 };
let week = (doy - iso_dow + 10) / 7;
if week < 1 {
Some(iso_weeks_in_year(year - 1))
} else if week > iso_weeks_in_year(year) {
Some(1)
} else {
Some(week)
}
}
#[must_use]
pub fn week(&self) -> Option<i64> {
self.weekofyear()
}
#[must_use]
pub fn to_unit(&self, unit: &str) -> Option<i64> {
if self.is_nat() {
return None;
}
match unit {
"ns" | "nanosecond" | "nanoseconds" => Some(self.nanos),
"us" | "microsecond" | "microseconds" => Some(self.nanos / 1_000),
"ms" | "millisecond" | "milliseconds" => Some(self.nanos / 1_000_000),
"s" | "second" | "seconds" => Some(self.nanos / 1_000_000_000),
_ => None,
}
}
#[must_use]
pub fn is_leap_year(&self) -> Option<bool> {
self.year()
.map(|y| (y % 4 == 0 && y % 100 != 0) || y % 400 == 0)
}
#[must_use]
pub fn is_month_start(&self) -> Option<bool> {
self.day().map(|d| d == 1)
}
#[must_use]
pub fn is_month_end(&self) -> Option<bool> {
let y = self.year()?;
let m = self.month()?;
let d = self.day()?;
let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
let days_in_month: [i64; 12] = [
31,
if is_leap { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
];
Some(d == days_in_month[(m - 1) as usize])
}
#[must_use]
pub fn is_quarter_start(&self) -> Option<bool> {
let m = self.month()?;
let d = self.day()?;
Some(d == 1 && (m == 1 || m == 4 || m == 7 || m == 10))
}
#[must_use]
pub fn is_quarter_end(&self) -> Option<bool> {
let m = self.month()?;
let d = self.day()?;
Some(
(m == 3 && d == 31)
|| (m == 6 && d == 30)
|| (m == 9 && d == 30)
|| (m == 12 && d == 31),
)
}
#[must_use]
pub fn is_year_start(&self) -> Option<bool> {
let m = self.month()?;
let d = self.day()?;
Some(m == 1 && d == 1)
}
#[must_use]
pub fn is_year_end(&self) -> Option<bool> {
let m = self.month()?;
let d = self.day()?;
Some(m == 12 && d == 31)
}
#[must_use]
pub fn days_in_month(&self) -> Option<i64> {
let y = self.year()?;
let m = self.month()?;
let is_leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0;
let days: [i64; 12] = [
31,
if is_leap { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
];
Some(days[(m - 1) as usize])
}
#[must_use]
pub fn daysinmonth(&self) -> Option<i64> {
self.days_in_month()
}
#[must_use]
pub fn normalize(&self) -> Self {
self.floor_to_unit("D")
}
#[must_use]
#[allow(clippy::too_many_arguments)]
pub fn replace(
&self,
year: Option<i64>,
month: Option<i64>,
day: Option<i64>,
hour: Option<i64>,
minute: Option<i64>,
second: Option<i64>,
microsecond: Option<i64>,
nanosecond: Option<i64>,
) -> Self {
if self.is_nat() {
return self.clone();
}
let cur_year = self.year().unwrap_or(1970);
let cur_month = self.month().unwrap_or(1);
let cur_day = self.day().unwrap_or(1);
let cur_hour = self.hour().unwrap_or(0);
let cur_minute = self.minute().unwrap_or(0);
let cur_second = self.second().unwrap_or(0);
let cur_micro = self.microsecond().unwrap_or(0);
let cur_nano = self.nanosecond().unwrap_or(0);
let y = year.unwrap_or(cur_year);
let mo = month.unwrap_or(cur_month);
let d = day.unwrap_or(cur_day);
let h = hour.unwrap_or(cur_hour);
let mi = minute.unwrap_or(cur_minute);
let s = second.unwrap_or(cur_second);
let us = microsecond.unwrap_or(cur_micro);
let ns = nanosecond.unwrap_or(cur_nano);
if !(1..=12).contains(&mo)
|| !(1..=days_in_month(y, mo as u32).unwrap_or(0) as i64).contains(&d)
|| !(0..=23).contains(&h)
|| !(0..=59).contains(&mi)
|| !(0..=59).contains(&s)
|| !(0..=999_999).contains(&us)
|| !(0..=999).contains(&ns)
{
return Self::nat();
}
let days_from_epoch = Self::days_from_ymd(y, mo, d);
let secs = h * 3600 + mi * 60 + s;
let total_nanos = days_from_epoch * Timedelta::NANOS_PER_DAY
+ secs * Timedelta::NANOS_PER_SEC
+ us * Timedelta::NANOS_PER_MICRO
+ ns;
Self {
nanos: total_nanos,
tz: self.tz.clone(),
}
}
fn days_from_ymd(year: i64, month: i64, day: i64) -> i64 {
let y = if month <= 2 { year - 1 } else { year };
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = y - era * 400;
let doy = (153 * (if month > 2 { month - 3 } else { month + 9 }) + 2) / 5 + day - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
era * 146097 + doe - 719468
}
#[must_use]
pub fn isoformat(&self) -> String {
if self.is_nat() {
return "NaT".to_string();
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let nanos_of_day = self.nanos.rem_euclid(Timedelta::NANOS_PER_DAY);
let secs_of_day = nanos_of_day / Timedelta::NANOS_PER_SEC;
let sub_nanos = nanos_of_day.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let year = if m <= 2 { y + 1 } else { y };
let hour = secs_of_day / 3600;
let minute = (secs_of_day % 3600) / 60;
let second = secs_of_day % 60;
let base = if sub_nanos == 0 {
format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}")
} else if sub_nanos.is_multiple_of(1_000) {
format!(
"{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{:06}",
sub_nanos / 1_000
)
} else {
format!("{year:04}-{m:02}-{d:02}T{hour:02}:{minute:02}:{second:02}.{sub_nanos:09}")
};
match &self.tz {
Some(tz) if tz == "UTC" => format!("{base}+00:00"),
Some(tz) => format!("{base}[{tz}]"),
None => base,
}
}
#[must_use]
pub fn to_iso8601(&self) -> String {
self.isoformat()
}
pub fn parse(s: &str) -> Result<Self, TypeError> {
let s = s.trim();
if s.eq_ignore_ascii_case("nat") {
return Ok(Self::nat());
}
let Some((datetime_part, tz)) = Self::split_timezone(s) else {
return Err(TypeError::ValueNotParseable {
value: s.to_string(),
target: "Timestamp".to_string(),
});
};
let (date_part, time_part) = if datetime_part.contains('T') {
datetime_part
.split_once('T')
.ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Timestamp".to_string(),
})?
} else if datetime_part.contains(' ')
&& datetime_part.chars().filter(|&c| c == ' ').count() == 1
{
datetime_part
.split_once(' ')
.ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Timestamp".to_string(),
})?
} else {
(datetime_part, "00:00:00")
};
let (year, month, day) =
Self::parse_date(date_part).ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Timestamp".to_string(),
})?;
let (hour, minute, second, nanos) =
Self::parse_time(time_part).ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Timestamp".to_string(),
})?;
let total_nanos = Self::ymd_hms_to_nanos(year, month, day, hour, minute, second, nanos);
Ok(if let Some(tz_name) = tz {
Self::from_nanos_tz(total_nanos, tz_name)
} else {
Self::from_nanos(total_nanos)
})
}
fn split_timezone(s: &str) -> Option<(&str, Option<String>)> {
if let Some(stripped) = s.strip_suffix('Z') {
Some((stripped, Some("UTC".to_string())))
} else if let Some(idx) = s.rfind('+') {
if idx > 10 && Self::is_timezone_offset(&s[idx..]) {
Some((&s[..idx], Some(s[idx..].to_string())))
} else if idx > 10 {
None
} else {
Some((s, None))
}
} else if let Some(idx) = s.rfind('-') {
if idx > 10 && Self::is_timezone_offset(&s[idx..]) {
Some((&s[..idx], Some(s[idx..].to_string())))
} else if idx > 10 {
None
} else {
Some((s, None))
}
} else {
Some((s, None))
}
}
fn is_timezone_offset(s: &str) -> bool {
let bytes = s.as_bytes();
if bytes.len() != 6 || !matches!(bytes[0], b'+' | b'-') || bytes[3] != b':' {
return false;
}
if !bytes[1..3].iter().all(u8::is_ascii_digit)
|| !bytes[4..6].iter().all(u8::is_ascii_digit)
{
return false;
}
let hour = u32::from(bytes[1] - b'0') * 10 + u32::from(bytes[2] - b'0');
let minute = u32::from(bytes[4] - b'0') * 10 + u32::from(bytes[5] - b'0');
hour <= 23 && minute <= 59
}
fn parse_date(s: &str) -> Option<(i64, u32, u32)> {
let parts: Vec<&str> = s.split('-').collect();
if parts.len() != 3 {
return None;
}
let year: i64 = parts[0].parse().ok()?;
let month: u32 = parts[1].parse().ok()?;
let day: u32 = parts[2].parse().ok()?;
if !(1..=days_in_month(year, month)?).contains(&day) {
return None;
}
Some((year, month, day))
}
fn parse_time(s: &str) -> Option<(u32, u32, u32, u64)> {
let (time_str, frac_str) = match s.split_once('.') {
Some((time, frac)) => (time, Some(frac)),
None => (s, None),
};
let parts: Vec<&str> = time_str.split(':').collect();
if parts.is_empty() || parts.len() > 3 {
return None;
}
let hour: u32 = parts.first().and_then(|p| p.parse().ok())?;
let minute: u32 = parts.get(1).and_then(|p| p.parse().ok()).unwrap_or(0);
let second: u32 = parts.get(2).and_then(|p| p.parse().ok()).unwrap_or(0);
if hour > 23 || minute > 59 || second > 59 {
return None;
}
let nanos = match frac_str {
None => 0,
Some(frac) => {
if frac.is_empty() || !frac.bytes().all(|b| b.is_ascii_digit()) {
return None;
}
let truncated = &frac[..frac.len().min(9)];
let padded = format!("{truncated:0<9}");
padded.parse::<u64>().ok()?
}
};
Some((hour, minute, second, nanos))
}
fn ymd_hms_to_nanos(
year: i64,
month: u32,
day: u32,
hour: u32,
minute: u32,
second: u32,
sub_nanos: u64,
) -> i64 {
let m = month as i64;
let d = day as i64;
let y = if m <= 2 { year - 1 } else { year };
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = y - era * 400;
let doy = (153 * (if m > 2 { m - 3 } else { m + 9 }) + 2) / 5 + d - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
let days_since_epoch = era * 146_097 + doe - 719_468;
let total_seconds = days_since_epoch * 86400
+ (hour as i64) * 3600
+ (minute as i64) * 60
+ (second as i64);
total_seconds * Timedelta::NANOS_PER_SEC + sub_nanos as i64
}
#[must_use]
pub fn strftime(&self, format: &str) -> String {
if self.is_nat() {
return "NaT".to_string();
}
let total_secs = self.nanos / Timedelta::NANOS_PER_SEC;
let sub_nanos = self.nanos.rem_euclid(Timedelta::NANOS_PER_SEC) as u64;
let days_since_epoch = total_secs / 86400;
let secs_of_day = (total_secs % 86400 + 86400) % 86400;
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let year = if m <= 2 { y + 1 } else { y };
let hour = secs_of_day / 3600;
let minute = (secs_of_day % 3600) / 60;
let second = secs_of_day % 60;
let micros = sub_nanos / 1000;
format
.replace("%Y", &format!("{year:04}"))
.replace("%m", &format!("{m:02}"))
.replace("%d", &format!("{d:02}"))
.replace("%H", &format!("{hour:02}"))
.replace("%M", &format!("{minute:02}"))
.replace("%S", &format!("{second:02}"))
.replace("%f", &format!("{micros:06}"))
}
#[must_use]
pub fn day_name(&self) -> String {
const NAMES: [&str; 7] = [
"Thursday",
"Friday",
"Saturday",
"Sunday",
"Monday",
"Tuesday",
"Wednesday",
];
if self.is_nat() {
return "NaT".to_string();
}
let days_since_epoch = self.nanos / Timedelta::NANOS_PER_DAY;
let dow = ((days_since_epoch % 7) + 7) % 7;
NAMES[dow as usize].to_string()
}
#[must_use]
pub fn month_name(&self) -> String {
const NAMES: [&str; 12] = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
];
if self.is_nat() {
return "NaT".to_string();
}
let days_since_epoch = self.nanos.div_euclid(Timedelta::NANOS_PER_DAY);
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
NAMES[(m - 1) as usize].to_string()
}
#[must_use]
pub fn tz_localize(&self, tz: Option<&str>) -> Self {
if self.is_nat() {
return Self::nat();
}
Self {
nanos: self.nanos,
tz: tz.map(String::from),
}
}
#[must_use]
pub fn tz_convert(&self, tz: &str) -> Self {
if self.is_nat() {
return Self::nat();
}
Self {
nanos: self.nanos,
tz: Some(tz.to_string()),
}
}
#[must_use]
pub fn fromtimestamp(ts: f64, tz: Option<&str>) -> Self {
if ts.is_nan() || ts.is_infinite() {
return Self::nat();
}
let nanos_f64 = ts * 1_000_000_000.0;
const MAX_NANOS: f64 = i64::MAX as f64;
const MIN_NANOS: f64 = i64::MIN as f64;
if !(MIN_NANOS..=MAX_NANOS).contains(&nanos_f64) {
return Self::nat();
}
Self {
nanos: nanos_f64 as i64,
tz: tz.map(String::from),
}
}
#[must_use]
pub fn from_millis(ms: i64, tz: Option<&str>) -> Self {
Self {
nanos: ms.saturating_mul(1_000_000),
tz: tz.map(String::from),
}
}
#[must_use]
pub fn from_micros(us: i64, tz: Option<&str>) -> Self {
Self {
nanos: us.saturating_mul(1_000),
tz: tz.map(String::from),
}
}
#[must_use]
pub fn tzinfo(&self) -> Option<&str> {
self.tz.as_deref()
}
#[must_use]
pub fn tzname(&self) -> Option<&str> {
self.tzinfo()
}
}
impl std::fmt::Display for Timestamp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.is_nat() {
return f.write_str("NaT");
}
match &self.tz {
Some(tz) => write!(f, "Timestamp[{}, {}]", self.nanos, tz),
None => write!(f, "Timestamp[{}, UTC]", self.nanos),
}
}
}
impl PartialOrd for Timestamp {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
if self.is_nat() || other.is_nat() {
return None;
}
Some(self.nanos.cmp(&other.nanos))
}
}
pub fn isna(values: &[Scalar]) -> Vec<bool> {
values.iter().map(Scalar::is_missing).collect()
}
pub fn isnull(values: &[Scalar]) -> Vec<bool> {
isna(values)
}
pub fn notna(values: &[Scalar]) -> Vec<bool> {
values.iter().map(|v| !v.is_missing()).collect()
}
pub fn notnull(values: &[Scalar]) -> Vec<bool> {
notna(values)
}
pub fn count_na(values: &[Scalar]) -> usize {
values.iter().filter(|v| v.is_missing()).count()
}
pub fn fill_na(values: &[Scalar], fill: &Scalar) -> Vec<Scalar> {
values
.iter()
.map(|v| {
if v.is_missing() {
fill.clone()
} else {
v.clone()
}
})
.collect()
}
pub fn dropna(values: &[Scalar]) -> Vec<Scalar> {
values.iter().filter(|v| !v.is_missing()).cloned().collect()
}
fn collect_finite(values: &[Scalar]) -> Vec<f64> {
values
.iter()
.filter(|v| !v.is_missing())
.filter_map(|v| v.to_f64().ok())
.collect()
}
fn collect_timedelta_ns(values: &[Scalar]) -> Option<(i128, usize)> {
let mut sum: i128 = 0;
let mut count: usize = 0;
let mut saw_timedelta = false;
for v in values {
if v.is_missing() {
continue;
}
match v {
Scalar::Timedelta64(ns) => {
saw_timedelta = true;
sum += i128::from(*ns);
count += 1;
}
_ => return None,
}
}
if saw_timedelta {
Some((sum, count))
} else {
None
}
}
pub fn nansum(values: &[Scalar]) -> Scalar {
if let Some((sum, _)) = collect_timedelta_ns(values) {
let clamped = sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
return Scalar::Timedelta64(clamped as i64);
}
let mut sum = 0.0_f64;
for v in values {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
sum += x;
}
}
Scalar::Float64(sum)
}
pub fn nanmean(values: &[Scalar]) -> Scalar {
if let Some((sum, count)) = collect_timedelta_ns(values) {
if count == 0 {
return Scalar::Timedelta64(Timedelta::NAT);
}
let mean = sum / count as i128;
let clamped = mean.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
return Scalar::Timedelta64(clamped as i64);
}
let mut sum = 0.0_f64;
let mut count = 0usize;
for v in values {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
sum += x;
count += 1;
}
}
if count == 0 {
return Scalar::Null(NullKind::NaN);
}
Scalar::Float64(sum / count as f64)
}
pub fn nanany(values: &[Scalar]) -> Scalar {
for v in values {
if v.is_missing() {
continue;
}
match v {
Scalar::Bool(flag) if *flag => return Scalar::Bool(true),
Scalar::Int64(val) if *val != 0 => return Scalar::Bool(true),
Scalar::Float64(val) if !val.is_nan() && *val != 0.0 => return Scalar::Bool(true),
Scalar::Utf8(val) if !val.is_empty() => return Scalar::Bool(true),
Scalar::Timedelta64(ns) if *ns != 0 => return Scalar::Bool(true),
_ => continue,
}
}
Scalar::Bool(false)
}
pub fn nanall(values: &[Scalar]) -> Scalar {
for v in values {
if v.is_missing() {
continue;
}
match v {
Scalar::Bool(flag) if !*flag => return Scalar::Bool(false),
Scalar::Int64(val) if *val == 0 => return Scalar::Bool(false),
Scalar::Float64(val) if val.is_nan() || *val == 0.0 => return Scalar::Bool(false),
Scalar::Utf8(val) if val.is_empty() => return Scalar::Bool(false),
Scalar::Timedelta64(ns) if *ns == 0 => return Scalar::Bool(false),
_ => continue,
}
}
Scalar::Bool(true)
}
pub fn nancount(values: &[Scalar]) -> Scalar {
let n = values.iter().filter(|v| !v.is_missing()).count();
Scalar::Int64(n as i64)
}
pub fn nanmin(values: &[Scalar]) -> Scalar {
let mut min: Option<&Scalar> = None;
for v in values {
if v.is_missing() {
continue;
}
match (min, v) {
(None, _) => min = Some(v),
(Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
if b < a {
min = Some(v)
}
}
(Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
if *b < *a {
min = Some(v)
}
}
(Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
if b < a {
min = Some(v)
}
}
(Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
if b < a {
min = Some(v)
}
}
(Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
if b < a {
min = Some(v)
}
}
(Some(a), b) => match (a.to_f64(), b.to_f64()) {
(Ok(af), Ok(bf)) if bf < af => min = Some(v),
(Ok(_), Ok(_)) => {}
_ => return Scalar::Null(NullKind::NaN),
},
}
}
match min {
Some(v) => v.clone(),
None => Scalar::Null(NullKind::NaN),
}
}
pub fn nanmax(values: &[Scalar]) -> Scalar {
let mut max: Option<&Scalar> = None;
for v in values {
if v.is_missing() {
continue;
}
match (max, v) {
(None, _) => max = Some(v),
(Some(Scalar::Int64(a)), Scalar::Int64(b)) => {
if b > a {
max = Some(v)
}
}
(Some(Scalar::Float64(a)), Scalar::Float64(b)) => {
if *b > *a {
max = Some(v)
}
}
(Some(Scalar::Utf8(a)), Scalar::Utf8(b)) => {
if b > a {
max = Some(v)
}
}
(Some(Scalar::Bool(a)), Scalar::Bool(b)) => {
if b > a {
max = Some(v)
}
}
(Some(Scalar::Timedelta64(a)), Scalar::Timedelta64(b)) => {
if b > a {
max = Some(v)
}
}
(Some(a), b) => match (a.to_f64(), b.to_f64()) {
(Ok(af), Ok(bf)) if bf > af => max = Some(v),
(Ok(_), Ok(_)) => {}
_ => return Scalar::Null(NullKind::NaN),
},
}
}
match max {
Some(v) => v.clone(),
None => Scalar::Null(NullKind::NaN),
}
}
fn collect_timedelta_ns_f64(values: &[Scalar]) -> Option<Vec<f64>> {
let mut out = Vec::with_capacity(values.len());
let mut saw_td = false;
for v in values {
if v.is_missing() {
continue;
}
match v {
Scalar::Timedelta64(ns) => {
saw_td = true;
out.push(*ns as f64);
}
_ => return None,
}
}
if saw_td { Some(out) } else { None }
}
fn float_ns_to_timedelta(value: f64) -> Scalar {
if !value.is_finite() {
return Scalar::Timedelta64(Timedelta::NAT);
}
let clamped = value.clamp(i64::MIN as f64, i64::MAX as f64);
Scalar::Timedelta64(clamped as i64)
}
pub fn nanmedian(values: &[Scalar]) -> Scalar {
if let Some(mut td) = collect_timedelta_ns_f64(values) {
if td.is_empty() {
return Scalar::Timedelta64(Timedelta::NAT);
}
let n = td.len();
let mid = n / 2;
let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
let (left, mid_ref, _right) = td.select_nth_unstable_by(mid, cmp);
let mid_val = *mid_ref;
let median_ns = if n.is_multiple_of(2) {
let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
(lower + mid_val) / 2.0
} else {
mid_val
};
return float_ns_to_timedelta(median_ns);
}
let mut nums = collect_finite(values);
if nums.is_empty() {
return Scalar::Null(NullKind::NaN);
}
let n = nums.len();
let mid = n / 2;
let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
let (left, mid_ref, _right) = nums.select_nth_unstable_by(mid, cmp);
let mid_val = *mid_ref;
if n.is_multiple_of(2) {
let lower = left.iter().copied().fold(f64::NEG_INFINITY, f64::max);
Scalar::Float64((lower + mid_val) / 2.0)
} else {
Scalar::Float64(mid_val)
}
}
pub fn nanvar(values: &[Scalar], ddof: usize) -> Scalar {
if let Some(td) = collect_timedelta_ns_f64(values) {
if td.len() <= ddof {
return Scalar::Timedelta64(Timedelta::NAT);
}
let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
return float_ns_to_timedelta(sum_sq / (td.len() - ddof) as f64);
}
let nums = collect_finite(values);
if nums.len() <= ddof {
return Scalar::Null(NullKind::NaN);
}
let mean: f64 = nums.iter().sum::<f64>() / nums.len() as f64;
let sum_sq: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
Scalar::Float64(sum_sq / (nums.len() - ddof) as f64)
}
pub fn nanstd(values: &[Scalar], ddof: usize) -> Scalar {
if let Some(td) = collect_timedelta_ns_f64(values) {
if td.len() <= ddof {
return Scalar::Timedelta64(Timedelta::NAT);
}
let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
let var = sum_sq / (td.len() - ddof) as f64;
return float_ns_to_timedelta(var.sqrt());
}
match nanvar(values, ddof) {
Scalar::Float64(v) => Scalar::Float64(v.sqrt()),
other => other,
}
}
pub fn nansem(values: &[Scalar], ddof: usize) -> Scalar {
if let Some(td) = collect_timedelta_ns_f64(values) {
if td.len() <= ddof {
return Scalar::Timedelta64(Timedelta::NAT);
}
let mean: f64 = td.iter().sum::<f64>() / td.len() as f64;
let sum_sq: f64 = td.iter().map(|x| (x - mean).powi(2)).sum();
let var = sum_sq / (td.len() - ddof) as f64;
let std = var.sqrt();
return float_ns_to_timedelta(std / (td.len() as f64).sqrt());
}
let nums = collect_finite(values);
if nums.len() <= ddof {
return Scalar::Null(NullKind::NaN);
}
match nanstd(values, ddof) {
Scalar::Float64(s) => Scalar::Float64(s / (nums.len() as f64).sqrt()),
other => other,
}
}
pub fn nanptp(values: &[Scalar]) -> Scalar {
if let Some(td) = collect_timedelta_ns_f64(values) {
if td.is_empty() {
return Scalar::Timedelta64(Timedelta::NAT);
}
let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
for x in &td {
if *x < lo {
lo = *x;
}
if *x > hi {
hi = *x;
}
}
return float_ns_to_timedelta(hi - lo);
}
let (mut lo, mut hi) = (f64::INFINITY, f64::NEG_INFINITY);
let mut seen = false;
for v in values {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
seen = true;
if x < lo {
lo = x;
}
if x > hi {
hi = x;
}
}
}
if !seen {
return Scalar::Null(NullKind::NaN);
}
Scalar::Float64(hi - lo)
}
pub fn nanskew(values: &[Scalar]) -> Scalar {
let nums = collect_finite(values);
let n = nums.len() as f64;
if n < 3.0 {
return Scalar::Null(NullKind::NaN);
}
let mean = nums.iter().sum::<f64>() / n;
let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
let m3: f64 = nums.iter().map(|x| (x - mean).powi(3)).sum();
let s2 = m2 / (n - 1.0);
if s2 == 0.0 {
return Scalar::Float64(0.0);
}
let s3 = s2.powf(1.5);
Scalar::Float64((n / ((n - 1.0) * (n - 2.0))) * (m3 / s3))
}
pub fn nankurt(values: &[Scalar]) -> Scalar {
let nums = collect_finite(values);
let n = nums.len() as f64;
if n < 4.0 {
return Scalar::Null(NullKind::NaN);
}
let mean = nums.iter().sum::<f64>() / n;
let m2: f64 = nums.iter().map(|x| (x - mean).powi(2)).sum();
let m4: f64 = nums.iter().map(|x| (x - mean).powi(4)).sum();
let s2 = m2 / (n - 1.0);
if s2 == 0.0 {
return Scalar::Float64(0.0);
}
let adj = (n * (n + 1.0)) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
let sub = (3.0 * (n - 1.0).powi(2)) / ((n - 2.0) * (n - 3.0));
Scalar::Float64(adj * (m4 / (s2 * s2)) - sub)
}
pub fn nanprod(values: &[Scalar]) -> Scalar {
if is_timedelta_input(values) {
return Scalar::Null(NullKind::NaN);
}
let mut prod = 1.0_f64;
for v in values {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
prod *= x;
}
}
Scalar::Float64(prod)
}
fn is_timedelta_input(values: &[Scalar]) -> bool {
let mut saw_td = false;
for v in values {
if v.is_missing() {
continue;
}
match v {
Scalar::Timedelta64(_) => saw_td = true,
_ => return false,
}
}
saw_td
}
fn timedelta_cumulative<F>(values: &[Scalar], init: i128, mut step: F) -> Vec<Scalar>
where
F: FnMut(i128, i128) -> i128,
{
let mut out = Vec::with_capacity(values.len());
let mut running: i128 = init;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaT));
continue;
}
if let Scalar::Timedelta64(ns) = v {
running = step(running, i128::from(*ns));
let clamped = running.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
out.push(Scalar::Timedelta64(clamped as i64));
} else {
out.push(Scalar::Null(NullKind::NaT));
}
}
out
}
fn timedelta_cumulative_extrema<F>(values: &[Scalar], sentinel: i64, mut step: F) -> Vec<Scalar>
where
F: FnMut(i64, i64) -> i64,
{
let mut out = Vec::with_capacity(values.len());
let mut running: Option<i64> = None;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaT));
continue;
}
if let Scalar::Timedelta64(ns) = v {
let new_val = match running {
Some(prev) => step(prev, *ns),
None => *ns,
};
running = Some(new_val);
out.push(Scalar::Timedelta64(new_val));
} else {
out.push(Scalar::Null(NullKind::NaT));
}
}
let _ = sentinel; out
}
pub fn nancumsum(values: &[Scalar]) -> Vec<Scalar> {
if is_timedelta_input(values) {
return timedelta_cumulative(values, 0_i128, |acc, x| acc.saturating_add(x));
}
let mut out = Vec::with_capacity(values.len());
let mut running = 0.0_f64;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaN));
continue;
}
match v.to_f64() {
Ok(x) if !x.is_nan() => {
running += x;
out.push(Scalar::Float64(running));
}
_ => out.push(Scalar::Null(NullKind::NaN)),
}
}
out
}
pub fn nancumprod(values: &[Scalar]) -> Vec<Scalar> {
let mut out = Vec::with_capacity(values.len());
let mut running = 1.0_f64;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaN));
continue;
}
match v.to_f64() {
Ok(x) if !x.is_nan() => {
running *= x;
out.push(Scalar::Float64(running));
}
_ => out.push(Scalar::Null(NullKind::NaN)),
}
}
out
}
pub fn nancummax(values: &[Scalar]) -> Vec<Scalar> {
if is_timedelta_input(values) {
return timedelta_cumulative_extrema(values, i64::MAX, |acc, x| acc.max(x));
}
let mut out = Vec::with_capacity(values.len());
let mut running: Option<f64> = None;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaN));
continue;
}
match v.to_f64() {
Ok(x) if !x.is_nan() => {
let new_val = match running {
Some(prev) => prev.max(x),
None => x,
};
running = Some(new_val);
out.push(Scalar::Float64(new_val));
}
_ => out.push(Scalar::Null(NullKind::NaN)),
}
}
out
}
pub fn nancummin(values: &[Scalar]) -> Vec<Scalar> {
if is_timedelta_input(values) {
return timedelta_cumulative_extrema(values, i64::MIN, |acc, x| acc.min(x));
}
let mut out = Vec::with_capacity(values.len());
let mut running: Option<f64> = None;
for v in values {
if v.is_missing() {
out.push(Scalar::Null(NullKind::NaN));
continue;
}
match v.to_f64() {
Ok(x) if !x.is_nan() => {
let new_val = match running {
Some(prev) => prev.min(x),
None => x,
};
running = Some(new_val);
out.push(Scalar::Float64(new_val));
}
_ => out.push(Scalar::Null(NullKind::NaN)),
}
}
out
}
pub fn nanquantile(values: &[Scalar], q: f64) -> Scalar {
if !(0.0..=1.0).contains(&q) {
return Scalar::Null(NullKind::NaN);
}
if let Some(mut td) = collect_timedelta_ns_f64(values) {
if td.is_empty() {
return Scalar::Timedelta64(Timedelta::NAT);
}
let n = td.len();
if n == 1 {
return float_ns_to_timedelta(td[0]);
}
let pos = q * (n - 1) as f64;
let lo = pos.floor() as usize;
let hi = pos.ceil() as usize;
let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
let (_left, lo_ref, right) = td.select_nth_unstable_by(lo, cmp);
let lo_val = *lo_ref;
let ns = if lo == hi {
lo_val
} else {
let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
let weight = pos - lo as f64;
lo_val + (hi_val - lo_val) * weight
};
return float_ns_to_timedelta(ns);
}
let mut nums = collect_finite(values);
if nums.is_empty() {
return Scalar::Null(NullKind::NaN);
}
let n = nums.len();
if n == 1 {
return Scalar::Float64(nums[0]);
}
let pos = q * (n - 1) as f64;
let lo = pos.floor() as usize;
let hi = pos.ceil() as usize;
let cmp = |a: &f64, b: &f64| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal);
let (_left, lo_ref, right) = nums.select_nth_unstable_by(lo, cmp);
let lo_val = *lo_ref;
if lo == hi {
return Scalar::Float64(lo_val);
}
let hi_val = right.iter().copied().fold(f64::INFINITY, f64::min);
let weight = pos - lo as f64;
Scalar::Float64(lo_val + (hi_val - lo_val) * weight)
}
pub fn nanargmax(values: &[Scalar]) -> Option<usize> {
if is_timedelta_input(values) {
let mut best: Option<(usize, i64)> = None;
for (i, v) in values.iter().enumerate() {
if v.is_missing() {
continue;
}
if let Scalar::Timedelta64(ns) = v {
match best {
None => best = Some((i, *ns)),
Some((_, cur)) if *ns > cur => best = Some((i, *ns)),
_ => {}
}
}
}
return best.map(|(i, _)| i);
}
let mut best: Option<(usize, f64)> = None;
for (i, v) in values.iter().enumerate() {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
if x.is_nan() {
continue;
}
match best {
None => best = Some((i, x)),
Some((_, cur)) if x > cur => best = Some((i, x)),
_ => {}
}
}
}
best.map(|(i, _)| i)
}
pub fn nanargmin(values: &[Scalar]) -> Option<usize> {
if is_timedelta_input(values) {
let mut best: Option<(usize, i64)> = None;
for (i, v) in values.iter().enumerate() {
if v.is_missing() {
continue;
}
if let Scalar::Timedelta64(ns) = v {
match best {
None => best = Some((i, *ns)),
Some((_, cur)) if *ns < cur => best = Some((i, *ns)),
_ => {}
}
}
}
return best.map(|(i, _)| i);
}
let mut best: Option<(usize, f64)> = None;
for (i, v) in values.iter().enumerate() {
if v.is_missing() {
continue;
}
if let Ok(x) = v.to_f64() {
if x.is_nan() {
continue;
}
match best {
None => best = Some((i, x)),
Some((_, cur)) if x < cur => best = Some((i, x)),
_ => {}
}
}
}
best.map(|(i, _)| i)
}
pub fn nannunique(values: &[Scalar]) -> Scalar {
use rustc_hash::FxHashSet;
#[derive(Hash, PartialEq, Eq)]
enum ScalarKey<'a> {
Bool(bool),
Int64(i64),
FloatBits(u64),
Utf8(&'a str),
Timedelta64(i64),
Datetime64(i64),
Period(i64, PeriodFreq),
Interval(u64, u64, IntervalClosed),
}
let mut seen = FxHashSet::default();
for val in values {
if val.is_missing() {
continue;
}
let key = match val {
Scalar::Bool(v) => ScalarKey::Bool(*v),
Scalar::Int64(v) => ScalarKey::Int64(*v),
Scalar::Float64(v) => {
let normalized = if *v == 0.0 { 0.0 } else { *v };
ScalarKey::FloatBits(normalized.to_bits())
}
Scalar::Utf8(v) => ScalarKey::Utf8(v.as_str()),
Scalar::Timedelta64(v) => ScalarKey::Timedelta64(*v),
Scalar::Datetime64(v) => ScalarKey::Datetime64(*v),
Scalar::Period(p) => ScalarKey::Period(p.ordinal, p.freq),
Scalar::Interval(v) => ScalarKey::Interval(
normalized_float_bits(v.left),
normalized_float_bits(v.right),
v.closed,
),
Scalar::Null(_) => continue,
};
seen.insert(key);
}
Scalar::Int64(seen.len() as i64)
}
fn normalized_float_bits(value: f64) -> u64 {
let normalized = if value == 0.0 { 0.0 } else { value };
normalized.to_bits()
}
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default, Serialize, Deserialize,
)]
#[serde(rename_all = "snake_case")]
#[non_exhaustive]
pub enum IntervalClosed {
Left,
#[default]
Right,
Both,
Neither,
}
impl IntervalClosed {
#[must_use]
pub fn left_closed(self) -> bool {
matches!(self, Self::Left | Self::Both)
}
#[must_use]
pub fn right_closed(self) -> bool {
matches!(self, Self::Right | Self::Both)
}
}
impl std::fmt::Display for IntervalClosed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Left => write!(f, "left"),
Self::Right => write!(f, "right"),
Self::Both => write!(f, "both"),
Self::Neither => write!(f, "neither"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct Interval {
pub left: f64,
pub right: f64,
#[serde(default)]
pub closed: IntervalClosed,
}
impl Interval {
#[must_use]
pub const fn new(left: f64, right: f64, closed: IntervalClosed) -> Self {
Self {
left,
right,
closed,
}
}
#[must_use]
pub fn length(&self) -> f64 {
self.right - self.left
}
#[must_use]
pub fn mid(&self) -> f64 {
(self.left + self.right) / 2.0
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.left == self.right && !matches!(self.closed, IntervalClosed::Both)
}
#[must_use]
pub fn contains(&self, value: f64) -> bool {
if value.is_nan() {
return false;
}
let left_ok = if self.closed.left_closed() {
value >= self.left
} else {
value > self.left
};
let right_ok = if self.closed.right_closed() {
value <= self.right
} else {
value < self.right
};
left_ok && right_ok
}
#[must_use]
pub fn overlaps(&self, other: &Self) -> bool {
if self.left > other.right || other.left > self.right {
return false;
}
if self.right == other.left {
return self.closed.right_closed() && other.closed.left_closed();
}
if other.right == self.left {
return other.closed.right_closed() && self.closed.left_closed();
}
true
}
pub fn parse(s: &str) -> Result<Self, TypeError> {
let s = s.trim();
if s.len() < 5 {
return Err(TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
});
}
let first_char = s.chars().next().unwrap();
let last_char = s.chars().last().unwrap();
let left_closed = match first_char {
'[' => true,
'(' => false,
_ => {
return Err(TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
});
}
};
let right_closed = match last_char {
']' => true,
')' => false,
_ => {
return Err(TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
});
}
};
let closed = match (left_closed, right_closed) {
(true, true) => IntervalClosed::Both,
(true, false) => IntervalClosed::Left,
(false, true) => IntervalClosed::Right,
(false, false) => IntervalClosed::Neither,
};
let inner = &s[1..s.len() - 1];
let parts: Vec<&str> = inner.split(',').collect();
if parts.len() != 2 {
return Err(TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
});
}
let left: f64 = parts[0]
.trim()
.parse()
.map_err(|_| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
})?;
let right: f64 = parts[1]
.trim()
.parse()
.map_err(|_| TypeError::ValueNotParseable {
value: s.to_string(),
target: "Interval".to_string(),
})?;
Ok(Self::new(left, right, closed))
}
}
impl std::fmt::Display for Interval {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let left_bracket = if self.closed.left_closed() { '[' } else { '(' };
let right_bracket = if self.closed.right_closed() { ']' } else { ')' };
write!(
f,
"{left_bracket}{}, {}{right_bracket}",
float_to_string_for_astype(self.left),
float_to_string_for_astype(self.right)
)
}
}
#[must_use]
pub fn interval_range_by_periods(
start: f64,
end: f64,
periods: usize,
closed: IntervalClosed,
) -> Vec<Interval> {
if periods == 0 || !start.is_finite() || !end.is_finite() || start >= end {
return Vec::new();
}
let step = (end - start) / (periods as f64);
let mut out = Vec::with_capacity(periods);
for i in 0..periods {
let left = start + step * (i as f64);
let right = if i + 1 == periods {
end
} else {
start + step * ((i + 1) as f64)
};
out.push(Interval::new(left, right, closed));
}
out
}
pub fn interval_range_by_step(
start: f64,
end: f64,
step: f64,
closed: IntervalClosed,
) -> Result<Vec<Interval>, TypeError> {
if !step.is_finite() || !step.is_sign_positive() || step == 0.0 {
return Err(TypeError::InvalidIntervalStep { step });
}
if !start.is_finite() || !end.is_finite() || start >= end {
return Ok(Vec::new());
}
let span = end - start;
let periods_f = span / step;
let periods = periods_f.round() as i64;
if periods <= 0 {
return Ok(Vec::new());
}
let reconstructed = step * (periods as f64);
if (span - reconstructed).abs() > span.abs() * 1e-9 + 1e-12 {
return Err(TypeError::IntervalStepDoesNotDivide { step, span });
}
let periods = periods as usize;
let mut out = Vec::with_capacity(periods);
for i in 0..periods {
let left = start + step * (i as f64);
let right = if i + 1 == periods {
end
} else {
start + step * ((i + 1) as f64)
};
out.push(Interval::new(left, right, closed));
}
Ok(out)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING-KEBAB-CASE")]
#[non_exhaustive]
pub enum PeriodFreq {
Annual,
Quarterly,
Monthly,
Weekly,
Daily,
Business,
Hourly,
Minutely,
Secondly,
}
impl PeriodFreq {
pub fn parse(alias: &str) -> Option<Self> {
match alias.to_ascii_uppercase().as_str() {
"A" | "Y" | "A-DEC" | "Y-DEC" | "ANNUAL" | "YEARLY" => Some(Self::Annual),
"Q" | "Q-DEC" | "QUARTERLY" => Some(Self::Quarterly),
"M" | "MONTHLY" => Some(Self::Monthly),
"W" | "W-SUN" | "WEEKLY" => Some(Self::Weekly),
"D" | "DAILY" => Some(Self::Daily),
"B" | "BUSINESS" => Some(Self::Business),
"H" | "HOURLY" => Some(Self::Hourly),
"T" | "MIN" | "MINUTELY" => Some(Self::Minutely),
"S" | "SECONDLY" => Some(Self::Secondly),
_ => None,
}
}
#[must_use]
pub const fn alias(self) -> &'static str {
match self {
Self::Annual => "Y-DEC",
Self::Quarterly => "Q-DEC",
Self::Monthly => "M",
Self::Weekly => "W-SUN",
Self::Daily => "D",
Self::Business => "B",
Self::Hourly => "h",
Self::Minutely => "min",
Self::Secondly => "s",
}
}
#[must_use]
pub const fn resolution(self) -> &'static str {
match self {
Self::Annual => "A-DEC",
Self::Quarterly => "Q-DEC",
Self::Monthly => "M",
Self::Weekly => "W-SUN",
Self::Daily => "D",
Self::Business => "B",
Self::Hourly => "H",
Self::Minutely => "T",
Self::Secondly => "S",
}
}
}
impl std::fmt::Display for PeriodFreq {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.alias())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Period {
pub ordinal: i64,
pub freq: PeriodFreq,
}
impl Period {
#[must_use]
pub const fn new(ordinal: i64, freq: PeriodFreq) -> Self {
Self { ordinal, freq }
}
#[must_use]
pub const fn ordinal(&self) -> i64 {
self.ordinal
}
#[must_use]
pub const fn freq(&self) -> PeriodFreq {
self.freq
}
#[must_use]
pub const fn freqstr(&self) -> &'static str {
self.freq.alias()
}
#[must_use]
pub fn cmp_same_freq(&self, other: &Self) -> Option<std::cmp::Ordering> {
if self.freq != other.freq {
return None;
}
Some(self.ordinal.cmp(&other.ordinal))
}
#[must_use]
pub fn shift(&self, n: i64) -> Self {
Self {
ordinal: self.ordinal.saturating_add(n),
freq: self.freq,
}
}
#[must_use]
pub fn diff(&self, other: &Self) -> Option<i64> {
if self.freq != other.freq {
return None;
}
Some(self.ordinal.saturating_sub(other.ordinal))
}
pub fn parse(s: &str) -> Result<Self, TypeError> {
let trimmed = s.trim();
if trimmed.eq_ignore_ascii_case("nat") {
return Ok(Self::new(i64::MIN, PeriodFreq::Daily));
}
if let Some((year, quarter)) = parse_quarter_period(trimmed) {
let ordinal = year
.checked_sub(1970)
.and_then(|offset| offset.checked_mul(4))
.and_then(|base| base.checked_add(i64::from(quarter) - 1))
.ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_owned(),
target: "Period".to_owned(),
})?;
return Ok(Self::new(ordinal, PeriodFreq::Quarterly));
}
if let Some((year, month, day)) = parse_ymd_period(trimmed) {
let ordinal = Timestamp::days_from_ymd(year, i64::from(month), i64::from(day));
return Ok(Self::new(ordinal, PeriodFreq::Daily));
}
if let Some((year, month)) = parse_year_month_period(trimmed) {
let ordinal = year
.checked_sub(1970)
.and_then(|offset| offset.checked_mul(12))
.and_then(|base| base.checked_add(i64::from(month) - 1))
.ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_owned(),
target: "Period".to_owned(),
})?;
return Ok(Self::new(ordinal, PeriodFreq::Monthly));
}
if let Some(year) = parse_annual_period(trimmed) {
let ordinal = year
.checked_sub(1970)
.ok_or_else(|| TypeError::ValueNotParseable {
value: s.to_owned(),
target: "Period".to_owned(),
})?;
return Ok(Self::new(ordinal, PeriodFreq::Annual));
}
Err(TypeError::ValueNotParseable {
value: s.to_owned(),
target: "Period".to_owned(),
})
}
#[must_use]
pub fn calendar_string(&self) -> String {
if self.ordinal == i64::MIN {
return "NaT".to_owned();
}
let ord = self.ordinal;
match self.freq {
PeriodFreq::Annual => {
let year = 1970 + ord;
format!("{year}")
}
PeriodFreq::Quarterly => {
let year = 1970 + ord.div_euclid(4);
let quarter = ord.rem_euclid(4) + 1;
format!("{year}Q{quarter}")
}
PeriodFreq::Monthly => {
let year = 1970 + ord.div_euclid(12);
let month = ord.rem_euclid(12) + 1;
format!("{year:04}-{month:02}")
}
PeriodFreq::Daily | PeriodFreq::Business | PeriodFreq::Weekly => {
let (y, m, d) = civil_from_days(ord);
format!("{y:04}-{m:02}-{d:02}")
}
PeriodFreq::Hourly => {
let (y, m, d) = civil_from_days(ord.div_euclid(24));
let hour = ord.rem_euclid(24);
format!("{y:04}-{m:02}-{d:02} {hour:02}:00")
}
PeriodFreq::Minutely => {
let day = ord.div_euclid(1440);
let mins = ord.rem_euclid(1440);
let (y, m, d) = civil_from_days(day);
format!("{y:04}-{m:02}-{d:02} {:02}:{:02}", mins / 60, mins % 60)
}
PeriodFreq::Secondly => {
let day = ord.div_euclid(86_400);
let secs = ord.rem_euclid(86_400);
let (y, m, d) = civil_from_days(day);
format!(
"{y:04}-{m:02}-{d:02} {:02}:{:02}:{:02}",
secs / 3600,
(secs % 3600) / 60,
secs % 60
)
}
}
}
}
fn civil_from_days(days_since_epoch: i64) -> (i64, u32, u32) {
let days = days_since_epoch + 719_468;
let era = if days >= 0 { days } else { days - 146_096 } / 146_097;
let doe = days - era * 146_097;
let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let year = if m <= 2 { y + 1 } else { y };
(year, m as u32, d as u32)
}
fn parse_annual_period(value: &str) -> Option<i64> {
(value.len() == 4 && value.chars().all(|ch| ch.is_ascii_digit()))
.then(|| value.parse::<i64>().ok())
.flatten()
}
fn parse_year_month_period(value: &str) -> Option<(i64, u32)> {
let (year, month) = value.split_once('-')?;
if year.len() != 4 || month.len() != 2 {
return None;
}
let year = year.parse::<i64>().ok()?;
let month = month.parse::<u32>().ok()?;
(1..=12).contains(&month).then_some((year, month))
}
fn parse_ymd_period(value: &str) -> Option<(i64, u32, u32)> {
let mut parts = value.split('-');
let year = parts.next()?;
let month = parts.next()?;
let day = parts.next()?;
if parts.next().is_some() || year.len() != 4 || month.len() != 2 || day.len() != 2 {
return None;
}
let year = year.parse::<i64>().ok()?;
let month = month.parse::<u32>().ok()?;
let day = day.parse::<u32>().ok()?;
(1..=days_in_month(year, month)?)
.contains(&day)
.then_some((year, month, day))
}
fn parse_quarter_period(value: &str) -> Option<(i64, u32)> {
let (year, quarter) = value.split_once('Q').or_else(|| value.split_once('q'))?;
if year.len() != 4 || quarter.len() != 1 {
return None;
}
let year = year.parse::<i64>().ok()?;
let quarter = quarter.parse::<u32>().ok()?;
(1..=4).contains(&quarter).then_some((year, quarter))
}
impl std::fmt::Display for Period {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.calendar_string())
}
}
#[must_use]
pub fn period_range(start: Period, periods: usize) -> Vec<Period> {
(0..periods).map(|i| start.shift(i as i64)).collect()
}
#[cfg(test)]
mod tests {
use super::{
DType, Interval, IntervalClosed, NullKind, Period, PeriodFreq, Scalar, SparseDType,
cast_scalar, common_dtype, infer_dtype,
};
#[test]
fn semantic_cmp_cross_numeric_int_float_cdpai() {
use std::cmp::Ordering;
let mut st: u64 = 0xc205_a1b2_c3d4_e5f6;
let mut next = || {
st = st
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(st >> 33) as u32
};
for _ in 0..3000u32 {
let i = (next() % 21) as i64 - 10;
let f = (next() % 400) as f64 / 20.0 - 10.0; let exp = (i as f64).partial_cmp(&f).unwrap();
assert_eq!(Scalar::Int64(i).semantic_cmp(&Scalar::Float64(f)), exp);
assert_eq!(
Scalar::Float64(f).semantic_cmp(&Scalar::Int64(i)),
exp.reverse()
);
}
assert_eq!(
Scalar::Int64(5).semantic_cmp(&Scalar::Float64(5.0)),
Ordering::Equal
);
assert_eq!(
Scalar::Float64(5.0).semantic_cmp(&Scalar::Int64(5)),
Ordering::Equal
);
assert_eq!(
Scalar::Int64(3).semantic_cmp(&Scalar::Float64(3.5)),
Ordering::Less
);
assert_eq!(
Scalar::Int64(4).semantic_cmp(&Scalar::Float64(3.5)),
Ordering::Greater
);
}
#[test]
fn semantic_cmp_nat_degeneracy_temporal_767ak() {
use std::cmp::Ordering;
const NAT: i64 = i64::MIN;
let td_nat = Scalar::Timedelta64(NAT);
for v in [-3i64, 0, 5, 99] {
let td = Scalar::Timedelta64(v);
assert_eq!(td_nat.semantic_cmp(&td), Ordering::Equal, "td NAT vs {v}");
assert_eq!(td.semantic_cmp(&td_nat), Ordering::Equal, "td {v} vs NAT");
}
assert_eq!(td_nat.semantic_cmp(&td_nat), Ordering::Equal);
let dt_nat = Scalar::Datetime64(NAT);
for v in [-3i64, 0, 5, 99] {
let dt = Scalar::Datetime64(v);
assert_eq!(dt_nat.semantic_cmp(&dt), Ordering::Equal, "dt NAT vs {v}");
assert_eq!(dt.semantic_cmp(&dt_nat), Ordering::Equal, "dt {v} vs NAT");
}
assert_eq!(dt_nat.semantic_cmp(&dt_nat), Ordering::Equal);
for (a, b) in [(1i64, 2i64), (5, 5), (9, -1)] {
let (ta, tb) = (Scalar::Timedelta64(a), Scalar::Timedelta64(b));
assert_eq!(ta.semantic_cmp(&ta), Ordering::Equal);
assert_eq!(ta.semantic_cmp(&tb), b.cmp(&a).reverse()); assert_eq!(ta.semantic_cmp(&tb), a.cmp(&b));
let (da, db) = (Scalar::Datetime64(a), Scalar::Datetime64(b));
assert_eq!(da.semantic_cmp(&db), a.cmp(&b));
}
}
#[test]
fn semantic_cmp_total_order_axioms_ay8o9() {
use std::cmp::Ordering;
let mut state: u64 = 0xc0ff_eeba_df00_d123;
let mut next = || {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(state >> 33) as u32
};
for iter in 0..6000u32 {
let dt = next() % 4;
let mk = |r: u32| match dt {
0 => Scalar::Int64((r % 11) as i64 - 5),
1 => Scalar::Float64(f64::from((r % 21) as i32 - 10) / 2.0),
2 => Scalar::Utf8(format!("s{}", r % 5)),
_ => Scalar::Bool(r.is_multiple_of(2)),
};
let a = mk(next());
let b = mk(next());
let c = mk(next());
let ctx = format!("iter={iter} a={a:?} b={b:?} c={c:?}");
assert_eq!(a.semantic_cmp(&a), Ordering::Equal, "reflexive {ctx}");
assert_eq!(
a.semantic_cmp(&b),
b.semantic_cmp(&a).reverse(),
"antisymmetric {ctx}"
);
let ab = a.semantic_cmp(&b);
assert_eq!(
a.semantic_le(&b),
ab != Ordering::Greater,
"le-consistent {ctx}"
);
assert_eq!(
a.semantic_ge(&b),
ab != Ordering::Less,
"ge-consistent {ctx}"
);
assert_eq!(
a.semantic_le(&b) && a.semantic_ge(&b),
ab == Ordering::Equal,
"le&ge<=>eq {ctx}"
);
if a.semantic_cmp(&b) != Ordering::Greater && b.semantic_cmp(&c) != Ordering::Greater {
assert_ne!(a.semantic_cmp(&c), Ordering::Greater, "transitivity {ctx}");
}
}
let nan = Scalar::Float64(f64::NAN);
for v in [
Scalar::Float64(-3.5),
Scalar::Float64(0.0),
Scalar::Float64(7.25),
] {
assert_eq!(nan.semantic_cmp(&v), Ordering::Equal, "NaN cmp finite");
assert_eq!(v.semantic_cmp(&nan), Ordering::Equal, "finite cmp NaN");
}
assert_eq!(nan.semantic_cmp(&nan), Ordering::Equal, "NaN cmp NaN");
}
#[test]
fn common_dtype_lattice_axioms_be314() {
const ALL: [DType; 13] = [
DType::Null,
DType::Bool,
DType::BoolNullable,
DType::Int64,
DType::Int64Nullable,
DType::Float64,
DType::Utf8,
DType::Categorical,
DType::Timedelta64,
DType::Datetime64,
DType::Period,
DType::Interval,
DType::Sparse,
];
for &a in &ALL {
assert_eq!(common_dtype(a, a), Ok(a), "idempotent {a:?}");
assert_eq!(
common_dtype(DType::Null, a),
Ok(a),
"null-left identity {a:?}"
);
assert_eq!(
common_dtype(a, DType::Null),
Ok(a),
"null-right identity {a:?}"
);
for &b in &ALL {
assert_eq!(
common_dtype(a, b).ok(),
common_dtype(b, a).ok(),
"commutative value {a:?},{b:?}"
);
assert_eq!(
common_dtype(a, b).is_ok(),
common_dtype(b, a).is_ok(),
"commutative ok-ness {a:?},{b:?}"
);
}
}
for &a in &ALL {
for &b in &ALL {
for &c in &ALL {
if let (Ok(ab), Ok(bc)) = (common_dtype(a, b), common_dtype(b, c))
&& let (Ok(left), Ok(right)) = (common_dtype(ab, c), common_dtype(a, bc))
{
assert_eq!(left, right, "associative {a:?},{b:?},{c:?}");
}
}
}
}
}
#[test]
fn infer_dtype_coercion_rules_e3sfq() {
use DType::{Bool, Float64, Int64, Null, Utf8};
assert_eq!(infer_dtype(&[]), Ok(Null));
assert_eq!(
infer_dtype(&[Scalar::Null(NullKind::Null), Scalar::Null(NullKind::NaN)]),
Ok(Null)
);
let mut s: u64 = 0x132d_a7e0_0e3f_c0de;
let mut next = || {
s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(s >> 33) as u32
};
for _ in 0..600u32 {
let n = (next() % 6) as usize + 1;
let ints: Vec<Scalar> = (0..n)
.map(|_| Scalar::Int64((next() % 9) as i64 - 4))
.collect();
assert_eq!(infer_dtype(&ints), Ok(Int64));
let floats: Vec<Scalar> = (0..n)
.map(|_| Scalar::Float64(f64::from((next() % 7) as i32)))
.collect();
assert_eq!(infer_dtype(&floats), Ok(Float64));
let bools: Vec<Scalar> = (0..n).map(|_| Scalar::Bool(next() % 2 == 0)).collect();
assert_eq!(infer_dtype(&bools), Ok(Bool));
let strs: Vec<Scalar> = (0..n)
.map(|_| Scalar::Utf8(format!("s{}", next() % 4)))
.collect();
assert_eq!(infer_dtype(&strs), Ok(Utf8));
}
assert_eq!(
infer_dtype(&[
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Int64(2)
]),
Ok(Int64),
"Int64 + nulls -> Int64"
);
assert_eq!(
infer_dtype(&[Scalar::Int64(1), Scalar::Float64(2.5)]),
Ok(Float64),
"Int64 + Float64 -> Float64"
);
assert_eq!(
infer_dtype(&[Scalar::Bool(true), Scalar::Int64(3)]),
Ok(Int64),
"Bool + Int64 -> Int64"
);
assert_eq!(
infer_dtype(&[Scalar::Utf8("a".into()), Scalar::Int64(3)]),
Ok(Utf8),
"Utf8 + Int64 -> Utf8 (object fallback)"
);
}
#[test]
fn missing_for_dtype_always_missing_1ews0() {
const ALL: [DType; 13] = [
DType::Null,
DType::Bool,
DType::BoolNullable,
DType::Int64,
DType::Int64Nullable,
DType::Float64,
DType::Utf8,
DType::Categorical,
DType::Timedelta64,
DType::Datetime64,
DType::Period,
DType::Interval,
DType::Sparse,
];
for &dt in &ALL {
let m = Scalar::missing_for_dtype(dt);
assert!(m.is_missing(), "missing_for_dtype({dt:?}) must be missing");
for &target in &ALL {
let cast = cast_scalar(&m, target).expect("cast of missing");
if target == DType::Utf8 {
assert!(
matches!(cast, Scalar::Utf8(_)),
"cast(missing {dt:?} -> Utf8) yields a string, got {cast:?}"
);
} else {
assert!(
cast.is_missing(),
"cast(missing {dt:?} -> {target:?}) must stay missing, got {cast:?}"
);
}
}
}
}
#[test]
fn cast_scalar_bool_int_roundtrip_6w07b() {
use super::cast_scalar;
assert_eq!(
cast_scalar(&Scalar::Bool(true), DType::Int64).unwrap(),
Scalar::Int64(1)
);
assert_eq!(
cast_scalar(&Scalar::Bool(false), DType::Int64).unwrap(),
Scalar::Int64(0)
);
assert_eq!(
cast_scalar(&Scalar::Int64(0), DType::Bool).unwrap(),
Scalar::Bool(false)
);
let mut st: u64 = 0x4b07_0b1c_2d3e_4f50;
let mut next = || {
st = st
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(st >> 33) as u32
};
for _ in 0..2000u32 {
let v = (next() % 21) as i64 - 10;
assert_eq!(
cast_scalar(&Scalar::Int64(v), DType::Bool).unwrap(),
Scalar::Bool(v != 0),
"int->bool v={v}"
);
}
}
#[test]
fn cast_scalar_float_to_int_truncates_toward_zero_u9lec() {
use super::cast_scalar;
let mut st: u64 = 0x4ca5_0b1c_2d3e_4f50;
let mut next = || {
st = st
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(st >> 33) as u32
};
for (f, exp) in [
(-3.7, -3i64),
(3.7, 3),
(-3.2, -3),
(3.2, 3),
(-0.9, 0),
(0.9, 0),
(-5.0, -5),
(5.0, 5),
] {
assert_eq!(
cast_scalar(&Scalar::Float64(f), DType::Int64).unwrap(),
Scalar::Int64(exp),
"cast {f}"
);
}
for _ in 0..3000u32 {
let v = (next() % 2_000_001) as f64 / 1000.0 - 1000.0; let got = cast_scalar(&Scalar::Float64(v), DType::Int64).unwrap();
assert_eq!(
got,
Scalar::Int64(v.trunc() as i64),
"trunc-toward-zero v={v}"
);
}
}
#[test]
fn nancount_nunique_prod_any_all_mx60x() {
use super::{nanall, nanany, nancount, nannunique, nanprod};
let mut s: u64 = 0x4e2a_0b1c_2d3e_4f50;
let mut next = || {
s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(s >> 33) as u32
};
let asf = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
let asb = |sc: Scalar| -> bool { matches!(sc, Scalar::Bool(true)) };
for iter in 0..1000u32 {
let n = (next() % 10) as usize + 1;
let raw: Vec<f64> = (0..n)
.map(|_| {
if next() % 4 == 0 {
f64::NAN
} else {
(next() % 5) as f64
}
})
.collect();
let finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
if finite.is_empty() {
continue;
}
let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
let distinct: std::collections::HashSet<u64> =
finite.iter().map(|x| x.to_bits()).collect();
let prod: f64 = finite.iter().product();
assert!(
(asf(nancount(&scalars)) - finite.len() as f64).abs() < 1e-9,
"nancount iter={iter}"
);
assert!(
(asf(nannunique(&scalars)) - distinct.len() as f64).abs() < 1e-9,
"nannunique iter={iter}"
);
assert!(
(asf(nanprod(&scalars)) - prod).abs() < 1e-7,
"nanprod iter={iter}"
);
assert_eq!(
asb(nanany(&scalars)),
finite.iter().any(|&x| x != 0.0),
"nanany iter={iter}"
);
assert_eq!(
asb(nanall(&scalars)),
finite.iter().all(|&x| x != 0.0),
"nanall iter={iter}"
);
}
}
#[test]
fn nan_reduction_kernels_skip_correctness_1uagc() {
use super::{nanmax, nanmedian, nanmin, nansum};
let mut s: u64 = 0x4e1a_0b2c_2d3e_4f50;
let mut next = || {
s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(s >> 33) as u32
};
let val = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
for iter in 0..1000u32 {
let n = (next() % 12) as usize + 1;
let raw: Vec<f64> = (0..n)
.map(|_| {
if next() % 4 == 0 {
f64::NAN
} else {
(next() % 200) as f64 - 100.0
}
})
.collect();
let mut finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
if finite.is_empty() {
continue;
}
let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
let sum: f64 = finite.iter().sum();
let mn = finite.iter().copied().fold(f64::INFINITY, f64::min);
let mx = finite.iter().copied().fold(f64::NEG_INFINITY, f64::max);
finite.sort_by(|a, b| a.partial_cmp(b).unwrap());
let m = finite.len();
let med = if m % 2 == 1 {
finite[m / 2]
} else {
(finite[m / 2 - 1] + finite[m / 2]) / 2.0
};
assert!(
(val(nansum(&scalars)) - sum).abs() < 1e-7,
"nansum iter={iter}"
);
assert!(
(val(nanmin(&scalars)) - mn).abs() < 1e-9,
"nanmin iter={iter}"
);
assert!(
(val(nanmax(&scalars)) - mx).abs() < 1e-9,
"nanmax iter={iter}"
);
assert!(
(val(nanmedian(&scalars)) - med).abs() < 1e-9,
"nanmedian iter={iter}"
);
}
}
#[test]
fn nanvar_ddof_nanstd_nan_skip_p00ag() {
use super::{nanmean, nanstd, nanvar};
let mut s: u64 = 0x4e0a_0b1c_2d3e_4f50;
let mut next = || {
s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(s >> 33) as u32
};
let val = |sc: Scalar| -> f64 { sc.to_f64().unwrap_or(f64::NAN) };
for iter in 0..1000u32 {
let n = (next() % 10) as usize + 2;
let raw: Vec<f64> = (0..n)
.map(|_| {
if next() % 4 == 0 {
f64::NAN
} else {
(next() % 200) as f64 / 7.0
}
})
.collect();
let finite: Vec<f64> = raw.iter().copied().filter(|x| !x.is_nan()).collect();
if finite.len() < 2 {
continue;
}
let scalars: Vec<Scalar> = raw.iter().map(|&x| Scalar::Float64(x)).collect();
let nf = finite.len() as f64;
let mean = finite.iter().sum::<f64>() / nf;
let ss = finite.iter().map(|x| (x - mean).powi(2)).sum::<f64>();
assert!(
(val(nanmean(&scalars)) - mean).abs() < 1e-7,
"nanmean iter={iter}"
);
assert!(
(val(nanvar(&scalars, 0)) - ss / nf).abs() < 1e-7,
"nanvar ddof0 iter={iter}"
);
assert!(
(val(nanvar(&scalars, 1)) - ss / (nf - 1.0)).abs() < 1e-7,
"nanvar ddof1 iter={iter}"
);
assert!(
(val(nanstd(&scalars, 1)) - (ss / (nf - 1.0)).sqrt()).abs() < 1e-7,
"nanstd ddof1 iter={iter}"
);
}
}
#[test]
fn nanskew_nankurt_min_sample_and_known_xybnq() {
use super::{nankurt, nanskew};
let f = |xs: &[f64]| -> Vec<Scalar> { xs.iter().map(|&x| Scalar::Float64(x)).collect() };
let val = |s: Scalar| -> Option<f64> {
if s.is_missing() {
None
} else {
s.to_f64().ok()
}
};
assert_eq!(val(nanskew(&f(&[1.0, 2.0]))), None, "skew n=2 -> NaN");
let sym = val(nanskew(&f(&[1.0, 2.0, 3.0]))).expect("skew n=3");
assert!(sym.abs() < 1e-9, "symmetric skew ~0, got {sym}");
let right = val(nanskew(&f(&[1.0, 1.0, 1.0, 5.0]))).expect("skew n=4");
assert!(right > 0.0, "right-skewed -> positive skew, got {right}");
assert_eq!(val(nankurt(&f(&[1.0, 2.0, 3.0]))), None, "kurt n=3 -> NaN");
let k = val(nankurt(&f(&[1.0, 2.0, 3.0, 4.0, 5.0]))).expect("kurt n=5");
assert!(
(k - (-1.2)).abs() < 1e-6,
"pandas kurt([1..5]) == -1.2, got {k}"
);
}
#[test]
fn cast_scalar_to_utf8_formatting_yes7i() {
let mut st: u64 = 0x4e57_0b1c_2d3e_4f50;
let mut next = || {
st = st
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(st >> 33) as u32
};
for _ in 0..3000u32 {
let n = (next() % 4_000_001) as i64 - 2_000_000;
assert_eq!(
cast_scalar(&Scalar::Int64(n), DType::Utf8).unwrap(),
Scalar::Utf8(n.to_string())
);
}
assert_eq!(
cast_scalar(&Scalar::Bool(true), DType::Utf8).unwrap(),
Scalar::Utf8("True".to_string())
);
assert_eq!(
cast_scalar(&Scalar::Bool(false), DType::Utf8).unwrap(),
Scalar::Utf8("False".to_string())
);
}
#[test]
fn cast_scalar_coercion_rules_6a83t() {
let mut state: u64 = 0x5a17_c0de_1234_abcd;
let mut next = || {
state = state
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(state >> 33) as u32
};
for _ in 0..4000u32 {
let n = (next() % 21) as i64 - 10; let b = next() % 2 == 0;
let f = f64::from((next() % 41) as i32 - 20) / 4.0;
let i = Scalar::Int64(n);
let bo = Scalar::Bool(b);
let fl = Scalar::Float64(f);
assert_eq!(cast_scalar(&i, DType::Int64), Ok(i.clone()));
assert_eq!(cast_scalar(&bo, DType::Bool), Ok(bo.clone()));
assert_eq!(cast_scalar(&fl, DType::Float64), Ok(fl.clone()));
assert_eq!(cast_scalar(&i, DType::Int64Nullable), Ok(i.clone()));
assert_eq!(cast_scalar(&bo, DType::BoolNullable), Ok(bo.clone()));
assert_eq!(cast_scalar(&i, DType::Bool), Ok(Scalar::Bool(n != 0)));
assert_eq!(
cast_scalar(&i, DType::Float64),
Ok(Scalar::Float64(n as f64))
);
assert_eq!(
cast_scalar(&bo, DType::Int64),
Ok(Scalar::Int64(i64::from(b)))
);
assert_eq!(
cast_scalar(&bo, DType::Float64),
Ok(Scalar::Float64(if b { 1.0 } else { 0.0 }))
);
assert_eq!(cast_scalar(&fl, DType::Int64), Ok(Scalar::Int64(f as i64)));
}
}
#[test]
fn scalar_from_primitive_types() {
assert_eq!(Scalar::from(true), Scalar::Bool(true));
assert_eq!(Scalar::from(42i64), Scalar::Int64(42));
assert_eq!(Scalar::from(1.5f64), Scalar::Float64(1.5));
assert_eq!(Scalar::from("hi"), Scalar::Utf8("hi".to_owned()));
assert_eq!(
Scalar::from(String::from("world")),
Scalar::Utf8("world".to_owned())
);
let mixed: Vec<Scalar> = vec![1i64.into(), 2.0f64.into(), "three".into()];
assert_eq!(mixed.len(), 3);
assert_eq!(mixed[0], Scalar::Int64(1));
assert_eq!(mixed[1], Scalar::Float64(2.0));
assert_eq!(mixed[2], Scalar::Utf8("three".to_owned()));
}
#[test]
fn dtype_inference_coerces_numeric_values() {
let values = vec![Scalar::Bool(true), Scalar::Int64(7), Scalar::Float64(3.5)];
assert_eq!(
infer_dtype(&values).expect("dtype should infer"),
DType::Float64
);
}
#[test]
fn interval_scalar_has_dtype_storage_and_unique_semantics_5g5uj() {
let left = Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right));
let right = Scalar::Interval(Interval::new(1.0, 2.0, IntervalClosed::Right));
assert_eq!(left.dtype(), DType::Interval);
assert!(!left.is_missing());
assert_eq!(
infer_dtype(&[left.clone(), right.clone()]).expect("interval dtype"),
DType::Interval
);
assert_eq!(
common_dtype(DType::Interval, DType::Interval).expect("same interval dtype"),
DType::Interval
);
assert_eq!(
cast_scalar(&Scalar::Null(NullKind::Null), DType::Interval).expect("missing casts"),
Scalar::Null(NullKind::Null)
);
assert_eq!(
cast_scalar(&left, DType::Utf8).expect("interval string cast"),
Scalar::Utf8("(0.0, 1.0]".to_owned())
);
assert_eq!(
super::nannunique(&[left.clone(), right, left, Scalar::Null(NullKind::Null)]),
Scalar::Int64(2)
);
}
#[test]
fn cast_scalar_parses_temporal_extension_strings_avm08() {
let expected_nanos = super::Timestamp::parse("2024-01-15T10:30:45")
.expect("timestamp parse")
.nanos;
assert_eq!(
cast_scalar(
&Scalar::Utf8("2024-01-15T10:30:45".to_owned()),
DType::Datetime64
)
.expect("datetime cast"),
Scalar::Datetime64(expected_nanos)
);
assert_eq!(
cast_scalar(&Scalar::Utf8("2024Q1".to_owned()), DType::Period).expect("period cast"),
Scalar::Period(Period::new(216, PeriodFreq::Quarterly))
);
assert_eq!(
cast_scalar(&Scalar::Utf8("(0, 1]".to_owned()), DType::Interval)
.expect("interval cast"),
Scalar::Interval(Interval::new(0.0, 1.0, IntervalClosed::Right))
);
}
#[test]
fn missing_values_get_target_missing_marker() {
let missing = Scalar::Null(NullKind::Null);
let cast = cast_scalar(&missing, DType::Float64).expect("missing casts");
assert_eq!(cast, Scalar::Null(NullKind::NaN));
}
#[test]
fn cast_scalar_to_utf8_uses_pandas_string_spellings() {
let cases = [
(Scalar::Bool(true), "True"),
(Scalar::Bool(false), "False"),
(Scalar::Int64(-7), "-7"),
(Scalar::Float64(1.0), "1.0"),
(Scalar::Float64(1.5), "1.5"),
(Scalar::Float64(f64::NAN), "nan"),
(Scalar::Null(NullKind::Null), "None"),
(Scalar::Null(NullKind::NaN), "nan"),
(Scalar::Null(NullKind::NaT), "NaT"),
];
for (value, expected) in cases {
assert_eq!(
cast_scalar(&value, DType::Utf8).expect("cast"),
Scalar::Utf8(expected.to_owned())
);
}
}
#[test]
fn semantic_eq_treats_nan_as_equal() {
let left = Scalar::Float64(f64::NAN);
let right = Scalar::Null(NullKind::NaN);
assert!(left.semantic_eq(&right));
}
#[test]
fn semantic_eq_treats_nan_as_missing_null() {
let left = Scalar::Float64(f64::NAN);
let right = Scalar::Null(NullKind::Null);
assert!(left.semantic_eq(&right));
}
#[test]
fn common_dtype_rejects_string_numeric_mix() {
let err = common_dtype(DType::Utf8, DType::Int64).expect_err("must fail");
assert_eq!(
err.to_string(),
"dtype coercion from Utf8 to Int64 has no compatible common type"
);
let err = common_dtype(DType::Float64, DType::Utf8).expect_err("must fail");
assert_eq!(
err.to_string(),
"dtype coercion from Float64 to Utf8 has no compatible common type"
);
}
#[test]
fn sparse_dtype_normalizes_fill_value_to_value_dtype() {
let dtype = SparseDType::new(DType::Float64, Scalar::Int64(0)).expect("fill should cast");
assert_eq!(dtype.value_dtype, DType::Float64);
assert_eq!(dtype.fill_value, Scalar::Float64(0.0));
}
#[test]
fn sparse_dtype_rejects_sparse_value_dtype() {
let err = SparseDType::new(DType::Sparse, Scalar::Int64(0)).expect_err("must reject");
assert_eq!(err.to_string(), "sparse value dtype cannot be Sparse");
}
#[test]
fn common_dtype_rejects_sparse_dense_mix() {
let err = common_dtype(DType::Sparse, DType::Int64).expect_err("must fail");
assert_eq!(
err.to_string(),
"dtype coercion from Sparse to Int64 has no compatible common type"
);
}
#[test]
fn nullable_int64_promotion_matrix() {
assert_eq!(
common_dtype(DType::Int64, DType::Int64Nullable).unwrap(),
DType::Int64Nullable
);
assert_eq!(
common_dtype(DType::Int64Nullable, DType::Int64).unwrap(),
DType::Int64Nullable
);
assert_eq!(
common_dtype(DType::Int64Nullable, DType::Float64).unwrap(),
DType::Float64
);
assert_eq!(
common_dtype(DType::Float64, DType::Int64Nullable).unwrap(),
DType::Float64
);
assert_eq!(
common_dtype(DType::Int64Nullable, DType::Int64Nullable).unwrap(),
DType::Int64Nullable
);
assert_eq!(
common_dtype(DType::Bool, DType::Int64Nullable).unwrap(),
DType::Int64Nullable
);
assert_eq!(
common_dtype(DType::BoolNullable, DType::Int64).unwrap(),
DType::Int64Nullable
);
}
#[test]
fn nullable_bool_promotion_matrix() {
assert_eq!(
common_dtype(DType::Bool, DType::BoolNullable).unwrap(),
DType::BoolNullable
);
assert_eq!(
common_dtype(DType::BoolNullable, DType::Bool).unwrap(),
DType::BoolNullable
);
assert_eq!(
common_dtype(DType::BoolNullable, DType::Float64).unwrap(),
DType::Float64
);
}
#[test]
fn dtype_is_nullable_helper() {
assert!(DType::Int64Nullable.is_nullable());
assert!(DType::BoolNullable.is_nullable());
assert!(!DType::Int64.is_nullable());
assert!(!DType::Bool.is_nullable());
assert!(!DType::Float64.is_nullable());
assert!(!DType::Utf8.is_nullable());
}
#[test]
fn dtype_to_nullable_conversions() {
assert_eq!(DType::Int64.to_nullable(), DType::Int64Nullable);
assert_eq!(DType::Bool.to_nullable(), DType::BoolNullable);
assert_eq!(DType::Float64.to_nullable(), DType::Float64); assert_eq!(DType::Int64Nullable.to_nullable(), DType::Int64Nullable);
}
#[test]
fn dtype_to_non_nullable_conversions() {
assert_eq!(DType::Int64Nullable.to_non_nullable(), DType::Int64);
assert_eq!(DType::BoolNullable.to_non_nullable(), DType::Bool);
assert_eq!(DType::Int64.to_non_nullable(), DType::Int64); assert_eq!(DType::Float64.to_non_nullable(), DType::Float64);
}
#[test]
fn nullable_dtype_name_reports_pandas_style() {
assert_eq!(DType::Int64.name(), "int64");
assert_eq!(DType::Int64Nullable.name(), "Int64"); assert_eq!(DType::Bool.name(), "bool");
assert_eq!(DType::BoolNullable.name(), "boolean");
}
#[test]
fn cast_scalar_int64_nullable_identity() {
let val = Scalar::Int64(42);
let result = cast_scalar(&val, DType::Int64Nullable).unwrap();
assert_eq!(result, Scalar::Int64(42));
let result2 = cast_scalar(&val, DType::Int64).unwrap();
assert_eq!(result2, Scalar::Int64(42));
}
#[test]
fn cast_float_to_utf8_uses_pandas_str_float_with_scientific() {
let cases: &[(f64, &str)] = &[
(1.0, "1.0"),
(2.5, "2.5"),
(100.0, "100.0"),
(0.1, "0.1"),
(0.0001, "0.0001"),
(1e16, "1e+16"),
(1e20, "1e+20"),
(1e-5, "1e-05"),
(1e-7, "1e-07"),
(f64::INFINITY, "inf"),
(f64::NEG_INFINITY, "-inf"),
];
for (v, expected) in cases {
assert_eq!(
cast_scalar(&Scalar::Float64(*v), DType::Utf8).unwrap(),
Scalar::Utf8((*expected).to_owned()),
"float {v} -> str"
);
}
}
#[test]
fn cast_float_to_utf8_threshold_boundaries_match_python() {
let cases: &[(f64, &str)] = &[
(1e15, "1000000000000000.0"),
(9_999_999_999_999_998.0, "9999999999999998.0"),
(1_234_567_890_123_456.0, "1234567890123456.0"),
(123_456_789_012_345.0, "123456789012345.0"),
(12_345_678_901_234_567.0, "1.2345678901234568e+16"),
(1e16, "1e+16"),
(1.5e16, "1.5e+16"),
(1e17, "1e+17"),
(1e-4, "0.0001"),
(5e-5, "5e-05"),
(-1e15, "-1000000000000000.0"),
(-1e16, "-1e+16"),
(-1e-5, "-1e-05"),
];
for (v, expected) in cases {
assert_eq!(
cast_scalar(&Scalar::Float64(*v), DType::Utf8).unwrap(),
Scalar::Utf8((*expected).to_owned()),
"float {v} -> str"
);
}
}
#[test]
fn cast_to_bool_uses_pandas_nonzero_truthiness() {
let cases_int: &[(i64, bool)] = &[(0, false), (1, true), (-3, true), (2, true)];
for (v, expected) in cases_int {
assert_eq!(
cast_scalar(&Scalar::Int64(*v), DType::Bool).unwrap(),
Scalar::Bool(*expected),
"int {v} -> bool"
);
}
let cases_float: &[(f64, bool)] = &[
(0.0, false),
(-0.0, false),
(0.1, true),
(2.5, true),
(1.0, true),
(f64::NAN, true),
];
for (v, expected) in cases_float {
assert_eq!(
cast_scalar(&Scalar::Float64(*v), DType::Bool).unwrap(),
Scalar::Bool(*expected),
"float {v} -> bool"
);
}
}
#[test]
fn nullable_dtype_is_extension() {
assert!(DType::Int64Nullable.is_extension());
assert!(DType::BoolNullable.is_extension());
assert!(!DType::Int64.is_extension());
assert!(!DType::Bool.is_extension());
}
#[test]
fn infer_dtype_preserves_string_numeric_mix_as_utf8_bucket() {
let values = vec![Scalar::Utf8("x".into()), Scalar::Int64(7)];
assert_eq!(
infer_dtype(&values).expect("dtype should infer"),
DType::Utf8
);
}
#[test]
fn is_null_detects_explicit_nulls() {
assert!(Scalar::Null(NullKind::Null).is_null());
assert!(Scalar::Null(NullKind::NaN).is_null());
assert!(!Scalar::Int64(42).is_null());
assert!(!Scalar::Float64(f64::NAN).is_null());
}
#[test]
fn is_na_matches_is_missing() {
let vals = vec![
Scalar::Null(NullKind::Null),
Scalar::Float64(f64::NAN),
Scalar::Int64(0),
Scalar::Bool(false),
];
for v in &vals {
assert_eq!(v.is_na(), v.is_missing());
}
}
#[test]
fn coalesce_picks_first_non_missing() {
let null = Scalar::Null(NullKind::Null);
let fill = Scalar::Int64(99);
assert_eq!(null.coalesce(&fill), fill);
assert_eq!(fill.coalesce(&null), fill);
}
#[test]
fn isna_notna_complement() {
let vals = vec![
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Float64(f64::NAN),
Scalar::Float64(3.0),
];
let na = super::isna(&vals);
let not = super::notna(&vals);
assert_eq!(na, vec![false, true, true, false]);
for (a, b) in na.iter().zip(not.iter()) {
assert_ne!(a, b);
}
}
#[test]
fn count_na_counts_missing() {
let vals = vec![
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Float64(f64::NAN),
];
assert_eq!(super::count_na(&vals), 2);
}
#[test]
fn fill_na_replaces_missing() {
let vals = vec![
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Float64(f64::NAN),
Scalar::Int64(4),
];
let filled = super::fill_na(&vals, &Scalar::Int64(0));
assert_eq!(filled[0], Scalar::Int64(1));
assert_eq!(filled[1], Scalar::Int64(0));
assert_eq!(filled[2], Scalar::Int64(0));
assert_eq!(filled[3], Scalar::Int64(4));
}
#[test]
fn dropna_removes_missing() {
let vals = vec![
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Int64(3),
Scalar::Float64(f64::NAN),
];
let kept = super::dropna(&vals);
assert_eq!(kept.len(), 2);
assert_eq!(kept[0], Scalar::Int64(1));
assert_eq!(kept[1], Scalar::Int64(3));
}
#[test]
fn null_helpers_match_scalar_oracle_imt0c() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3202034522624059733)
.wrapping_add(4354685564936845319);
*seed
}
fn assert_null_helpers(case: usize, values: &[Scalar], fill: &Scalar) {
let expected_missing = values.iter().filter(|value| value.is_missing()).count();
let expected_dropped = values
.iter()
.filter(|value| !value.is_missing())
.cloned()
.collect::<Vec<_>>();
let expected_filled = values
.iter()
.map(|value| {
if value.is_missing() {
fill.clone()
} else {
value.clone()
}
})
.collect::<Vec<_>>();
assert_eq!(
super::count_na(values),
expected_missing,
"case={case}: count_na mismatch for {values:?}"
);
let dropped = super::dropna(values);
assert_eq!(
dropped.len(),
expected_dropped.len(),
"case={case}: dropna length mismatch for {values:?}"
);
for (pos, (actual, expected)) in dropped.iter().zip(expected_dropped.iter()).enumerate()
{
assert!(
actual.semantic_eq(expected),
"case={case} pos={pos}: dropna expected {expected:?}, got {actual:?}"
);
}
let filled = super::fill_na(values, fill);
assert_eq!(
filled.len(),
expected_filled.len(),
"case={case}: fill_na length mismatch for {values:?}"
);
for (pos, (actual, expected)) in filled.iter().zip(expected_filled.iter()).enumerate() {
assert!(
actual.semantic_eq(expected),
"case={case} pos={pos}: fill_na expected {expected:?}, got {actual:?}"
);
}
}
let all_missing = [
Scalar::Null(NullKind::Null),
Scalar::Null(NullKind::NaN),
Scalar::Float64(f64::NAN),
Scalar::Timedelta64(i64::MIN),
];
assert_null_helpers(usize::MAX, &all_missing, &Scalar::Utf8("filled".into()));
let mut seed = 0xc011_a7ed_0b5e_1a55_u64;
for case in 0..260 {
let len = (next(&mut seed) % 83 + 1) as usize;
let mut values = Vec::with_capacity(len);
for pos in 0..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
values.push(match next(&mut seed) % 11 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Timedelta64(i64::MIN),
4 => Scalar::Bool(raw & 1 == 0),
5 => Scalar::Int64(raw),
6 => Scalar::Float64(raw as f64 / 37.0),
7 => Scalar::Float64(if raw & 1 == 0 { 0.0 } else { -0.0 }),
8 => Scalar::Utf8(format!("null_helper_{case}_{pos}")),
9 => Scalar::Utf8(String::new()),
_ => Scalar::Timedelta64(raw),
});
}
let fill = match case % 5 {
0 => Scalar::Bool(true),
1 => Scalar::Int64(-777),
2 => Scalar::Float64(12.5),
3 => Scalar::Utf8("filled".into()),
_ => Scalar::Timedelta64(123_456),
};
assert_null_helpers(case, &values, &fill);
}
}
#[test]
fn nansum_skips_nulls() {
let vals = vec![
Scalar::Float64(1.0),
Scalar::Null(NullKind::Null),
Scalar::Float64(2.0),
Scalar::Float64(f64::NAN),
Scalar::Int64(7),
];
assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
}
#[test]
fn nansum_empty_returns_zero() {
assert_eq!(super::nansum(&[]), Scalar::Float64(0.0));
}
#[test]
fn nansum_nanmean_match_numeric_and_timedelta_oracle_1xmi7() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn expected_numeric(values: &[Scalar]) -> (Scalar, Scalar) {
let mut sum = 0.0;
let mut count = 0usize;
for value in values {
if value.is_missing() {
continue;
}
if let Ok(value) = value.to_f64() {
sum += value;
count += 1;
}
}
let mean = if count == 0 {
Scalar::Null(NullKind::NaN)
} else {
Scalar::Float64(sum / count as f64)
};
(Scalar::Float64(sum), mean)
}
fn expected_timedelta(values: &[Scalar]) -> (Scalar, Scalar) {
let mut sum = 0_i128;
let mut count = 0_i128;
for value in values {
if let Scalar::Timedelta64(ns) = value
&& !value.is_missing()
{
sum += i128::from(*ns);
count += 1;
}
}
if count == 0 {
return (Scalar::Float64(0.0), Scalar::Null(NullKind::NaN));
}
let sum = sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
let mean = (sum / count).clamp(i128::from(i64::MIN), i128::from(i64::MAX));
(
Scalar::Timedelta64(sum as i64),
Scalar::Timedelta64(mean as i64),
)
}
fn assert_sum_mean(
case: usize,
family: &str,
values: &[Scalar],
expected_sum: Scalar,
expected_mean: Scalar,
) {
let actual_sum = super::nansum(values);
let actual_mean = super::nanmean(values);
assert!(
actual_sum.semantic_eq(&expected_sum),
"case={case} family={family}: expected sum {expected_sum:?}, got {actual_sum:?} for {values:?}"
);
assert!(
actual_mean.semantic_eq(&expected_mean),
"case={case} family={family}: expected mean {expected_mean:?}, got {actual_mean:?} for {values:?}"
);
}
let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
let (sum, mean) = expected_numeric(&all_missing);
assert_sum_mean(usize::MAX, "numeric_all_missing", &all_missing, sum, mean);
let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
let (sum, mean) = expected_timedelta(&td_all_missing);
assert_sum_mean(
usize::MAX - 1,
"timedelta_all_missing",
&td_all_missing,
sum,
mean,
);
let mut seed = 0x511d_ed5a_7a11_1a55_u64;
for case in 0..260 {
let len = (next(&mut seed) % 89 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 257),
5 => Scalar::Float64(raw as f64 / 67.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
let (sum, mean) = expected_numeric(&numeric);
assert_sum_mean(case, "numeric", &numeric, sum, mean);
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
let (sum, mean) = expected_timedelta(&timedeltas);
assert_sum_mean(case, "timedelta", &timedeltas, sum, mean);
}
}
#[test]
fn nannunique_merges_negative_zero_and_zero() {
let vals = vec![
Scalar::Float64(-0.0),
Scalar::Float64(0.0),
Scalar::Float64(1.0),
];
assert_eq!(super::nannunique(&vals), Scalar::Int64(2));
}
#[test]
fn nannunique_matches_scalar_bucket_oracle_elvbg() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn same_bucket(left: &Scalar, right: &Scalar) -> bool {
match (left, right) {
(Scalar::Float64(left), Scalar::Float64(right)) => {
let left = if *left == 0.0 { 0.0 } else { *left };
let right = if *right == 0.0 { 0.0 } else { *right };
left.to_bits() == right.to_bits()
}
_ => left == right,
}
}
fn expected_nunique(values: &[Scalar]) -> i64 {
let mut seen = Vec::<Scalar>::new();
for value in values {
if value.is_missing() {
continue;
}
if !seen.iter().any(|existing| same_bucket(existing, value)) {
seen.push(value.clone());
}
}
seen.len() as i64
}
fn assert_nannunique(case: usize, values: &[Scalar]) {
assert_eq!(
super::nannunique(values),
Scalar::Int64(expected_nunique(values)),
"case={case}: nannunique mismatch for {values:?}"
);
}
assert_nannunique(
usize::MAX,
&[
Scalar::Float64(-0.0),
Scalar::Float64(0.0),
Scalar::Float64(f64::NAN),
Scalar::Null(NullKind::Null),
Scalar::Timedelta64(i64::MIN),
],
);
let mut seed = 0x0e1b_60d0_b5e7_u64;
for case in 0..320 {
let len = (next(&mut seed) % 97 + 1) as usize;
let mut values = Vec::with_capacity(len);
for pos in 0..len {
let raw = (next(&mut seed) % 1_001) as i64 - 500;
values.push(match next(&mut seed) % 11 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 37),
5 => Scalar::Float64(raw as f64 / 19.0),
6 => Scalar::Float64(0.0),
7 => Scalar::Float64(-0.0),
8 => Scalar::Utf8(format!("uniq_{}", pos % 13)),
9 => Scalar::Utf8(String::new()),
_ => Scalar::Timedelta64(raw % 41),
});
}
assert_nannunique(case, &values);
}
}
#[test]
fn nanmean_basic() {
let vals = vec![
Scalar::Float64(2.0),
Scalar::Null(NullKind::Null),
Scalar::Float64(4.0),
];
assert_eq!(super::nanmean(&vals), Scalar::Float64(3.0));
}
#[test]
fn nanmean_all_null_returns_nan() {
let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert!(super::nanmean(&vals).is_missing());
}
#[test]
fn nansum_nanmean_timedelta64_preserves_dtype_620mj() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
];
assert_eq!(super::nansum(&vals), Scalar::Timedelta64(6 * one_hour));
assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
}
#[test]
fn nansum_nanmean_timedelta64_skips_nat_620mj() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![
Scalar::Timedelta64(Timedelta::NAT),
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(3 * one_hour),
Scalar::Timedelta64(Timedelta::NAT),
];
assert_eq!(super::nansum(&vals), Scalar::Timedelta64(4 * one_hour));
assert_eq!(super::nanmean(&vals), Scalar::Timedelta64(2 * one_hour));
}
#[test]
fn nansum_nanmean_mixed_timedelta_other_falls_back_620mj() {
let vals = vec![Scalar::Timedelta64(3600 * 1_000_000_000), Scalar::Int64(5)];
assert_eq!(super::nansum(&vals), Scalar::Float64(5.0));
}
#[test]
fn nancount_counts_non_missing() {
let vals = vec![
Scalar::Int64(1),
Scalar::Null(NullKind::Null),
Scalar::Float64(3.0),
];
assert_eq!(super::nancount(&vals), Scalar::Int64(2));
}
#[test]
fn nanany_nanall_nancount_match_scalar_oracle_zr2qg() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn truthy(value: &Scalar) -> Option<bool> {
if value.is_missing() {
return None;
}
match value {
Scalar::Bool(value) => Some(*value),
Scalar::Int64(value) => Some(*value != 0),
Scalar::Float64(value) => Some(*value != 0.0),
Scalar::Utf8(value) => Some(!value.is_empty()),
Scalar::Timedelta64(value) => Some(*value != 0),
_ => None,
}
}
fn assert_nanops(case: usize, values: &[Scalar]) {
let truth_values = values.iter().filter_map(truthy).collect::<Vec<_>>();
let expected_any = truth_values.iter().any(|value| *value);
let expected_all = !truth_values.iter().any(|value| !*value);
let expected_count = values.iter().filter(|value| !value.is_missing()).count() as i64;
assert_eq!(
super::nanany(values),
Scalar::Bool(expected_any),
"case={case}: nanany mismatch for {values:?}"
);
assert_eq!(
super::nanall(values),
Scalar::Bool(expected_all),
"case={case}: nanall mismatch for {values:?}"
);
assert_eq!(
super::nancount(values),
Scalar::Int64(expected_count),
"case={case}: nancount mismatch for {values:?}"
);
}
assert_nanops(
usize::MAX,
&[
Scalar::Null(NullKind::Null),
Scalar::Null(NullKind::NaN),
Scalar::Float64(f64::NAN),
Scalar::Timedelta64(i64::MIN),
],
);
let mut seed = 0x7a20_2f7e_5ca1_ab1e_u64;
for case in 0..320 {
let len = (next(&mut seed) % 89 + 1) as usize;
let mut values = Vec::with_capacity(len);
for pos in 0..len {
let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
let value = match next(&mut seed) % 12 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Bool(false),
5 => Scalar::Int64(raw % 17),
6 => Scalar::Int64(0),
7 => Scalar::Float64(raw as f64 / 23.0),
8 => Scalar::Float64(0.0),
9 => Scalar::Utf8(if raw & 1 == 0 {
String::new()
} else {
format!("nanops_{case}_{pos}")
}),
10 => Scalar::Timedelta64(raw),
_ => Scalar::Timedelta64(0),
};
values.push(value);
}
assert_nanops(case, &values);
}
}
#[test]
fn nanmin_basic() {
let vals = vec![
Scalar::Float64(5.0),
Scalar::Null(NullKind::Null),
Scalar::Float64(2.0),
Scalar::Float64(8.0),
];
assert_eq!(super::nanmin(&vals), Scalar::Float64(2.0));
}
#[test]
fn nanmax_basic() {
let vals = vec![
Scalar::Float64(5.0),
Scalar::Float64(f64::NAN),
Scalar::Float64(8.0),
];
assert_eq!(super::nanmax(&vals), Scalar::Float64(8.0));
}
#[test]
fn nanmin_nanmax_empty_returns_nan() {
assert!(super::nanmin(&[]).is_missing());
assert!(super::nanmax(&[]).is_missing());
}
#[test]
fn nanmin_nanmax_match_same_family_oracle_vj7ds() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn family_cmp(left: &Scalar, right: &Scalar) -> std::cmp::Ordering {
match (left, right) {
(Scalar::Bool(left), Scalar::Bool(right)) => left.cmp(right),
(Scalar::Int64(left), Scalar::Int64(right)) => left.cmp(right),
(Scalar::Float64(left), Scalar::Float64(right)) => {
left.partial_cmp(right).expect("finite floats")
}
(Scalar::Utf8(left), Scalar::Utf8(right)) => left.cmp(right),
(Scalar::Timedelta64(left), Scalar::Timedelta64(right)) => left.cmp(right),
_ => panic!("mixed family in nanmin/nanmax oracle"),
}
}
fn assert_minmax(case: usize, family: &str, values: &[Scalar]) {
let present = values
.iter()
.filter(|value| !value.is_missing())
.cloned()
.collect::<Vec<_>>();
let actual_min = super::nanmin(values);
let actual_max = super::nanmax(values);
if present.is_empty() {
assert!(
actual_min.is_missing(),
"case={case} family={family}: expected missing min for {values:?}, got {actual_min:?}"
);
assert!(
actual_max.is_missing(),
"case={case} family={family}: expected missing max for {values:?}, got {actual_max:?}"
);
return;
}
let expected_min = present.iter().min_by(|left, right| family_cmp(left, right));
let expected_max = present.iter().max_by(|left, right| family_cmp(left, right));
assert!(
actual_min.semantic_eq(expected_min.expect("min")),
"case={case} family={family}: expected min {:?}, got {actual_min:?} for {values:?}",
expected_min.expect("min")
);
assert!(
actual_max.semantic_eq(expected_max.expect("max")),
"case={case} family={family}: expected max {:?}, got {actual_max:?} for {values:?}",
expected_max.expect("max")
);
}
let all_missing = [
Scalar::Null(NullKind::Null),
Scalar::Null(NullKind::NaN),
Scalar::Float64(f64::NAN),
Scalar::Timedelta64(i64::MIN),
];
assert_minmax(usize::MAX, "all_missing", &all_missing);
let mut seed = 0xa11c_0aba_2e7d_f00d_u64;
for case in 0..240 {
let len = (next(&mut seed) % 73 + 1) as usize;
let mut ints = Vec::with_capacity(len);
ints.push(Scalar::Int64(case as i64 - 120));
for _ in 1..len {
let raw = (next(&mut seed) % 1_001) as i64 - 500;
ints.push(match next(&mut seed) % 6 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
_ => Scalar::Int64(raw),
});
}
assert_minmax(case, "int", &ints);
let mut floats = Vec::with_capacity(len);
floats.push(Scalar::Float64(case as f64 / 11.0));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
floats.push(match next(&mut seed) % 8 {
0 | 1 => Scalar::Float64(f64::NAN),
2 => Scalar::Float64(f64::INFINITY),
3 => Scalar::Float64(f64::NEG_INFINITY),
4 => Scalar::Float64(0.0),
5 => Scalar::Float64(-0.0),
_ => Scalar::Float64(raw as f64 / 41.0),
});
}
assert_minmax(case, "float", &floats);
let mut bools = Vec::with_capacity(len);
bools.push(Scalar::Bool(case & 1 == 0));
for _ in 1..len {
bools.push(match next(&mut seed) % 5 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
raw => Scalar::Bool(raw & 1 == 0),
});
}
assert_minmax(case, "bool", &bools);
let mut utf8 = Vec::with_capacity(len);
utf8.push(Scalar::Utf8(format!("minmax_{}", case % 17)));
for pos in 1..len {
utf8.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
raw => Scalar::Utf8(format!("minmax_{}_{}", raw, pos % 11)),
});
}
assert_minmax(case, "utf8", &utf8);
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 120));
for _ in 1..len {
let raw = (next(&mut seed) % 1_003) as i64 - 501;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_minmax(case, "timedelta", &timedeltas);
}
}
#[test]
fn nanmin_nanmax_mixed_incompatible_types_returns_nan() {
let vals = vec![Scalar::Int64(5), Scalar::Utf8("hello".into())];
assert!(super::nanmin(&vals).is_missing());
assert!(super::nanmax(&vals).is_missing());
let vals2 = vec![Scalar::Utf8("a".into()), Scalar::Float64(3.0)];
assert!(super::nanmin(&vals2).is_missing());
assert!(super::nanmax(&vals2).is_missing());
}
#[test]
fn nanmin_nanmax_compatible_numeric_types_ok() {
let vals = vec![Scalar::Int64(5), Scalar::Float64(3.0), Scalar::Bool(true)];
assert_eq!(super::nanmin(&vals), Scalar::Bool(true));
assert_eq!(super::nanmax(&vals), Scalar::Int64(5));
}
#[test]
fn nanmin_nanmax_timedelta64_returns_timedelta_yic5m() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![
Scalar::Timedelta64(3 * one_hour),
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
];
assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(3 * one_hour));
}
#[test]
fn nanmin_nanmax_timedelta64_skips_nat_yic5m() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![
Scalar::Timedelta64(Timedelta::NAT),
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(Timedelta::NAT),
];
assert_eq!(super::nanmin(&vals), Scalar::Timedelta64(one_hour));
assert_eq!(super::nanmax(&vals), Scalar::Timedelta64(2 * one_hour));
}
#[test]
fn nanmedian_odd_count() {
let vals = vec![
Scalar::Float64(3.0),
Scalar::Null(NullKind::Null),
Scalar::Float64(1.0),
Scalar::Float64(2.0),
];
assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.0));
}
#[test]
fn nanmedian_even_count() {
let vals = vec![
Scalar::Float64(1.0),
Scalar::Float64(3.0),
Scalar::Float64(2.0),
Scalar::Float64(4.0),
];
assert_eq!(super::nanmedian(&vals), Scalar::Float64(2.5));
}
#[test]
fn nanmedian_matches_numeric_and_timedelta_oracle_oabhi() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn expected_numeric(values: &[Scalar]) -> Scalar {
let mut finite = values
.iter()
.filter(|value| !value.is_missing())
.filter_map(|value| value.to_f64().ok())
.filter(|value| !value.is_nan())
.collect::<Vec<_>>();
if finite.is_empty() {
return Scalar::Null(NullKind::NaN);
}
finite.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
let mid = finite.len() / 2;
if finite.len().is_multiple_of(2) {
Scalar::Float64((finite[mid - 1] + finite[mid]) / 2.0)
} else {
Scalar::Float64(finite[mid])
}
}
fn expected_timedelta(values: &[Scalar]) -> Scalar {
let mut finite = values
.iter()
.filter_map(|value| match value {
Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
_ => None,
})
.collect::<Vec<_>>();
if finite.is_empty() {
return Scalar::Null(NullKind::NaN);
}
finite.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
let mid = finite.len() / 2;
let median = if finite.len().is_multiple_of(2) {
(finite[mid - 1] + finite[mid]) / 2.0
} else {
finite[mid]
};
Scalar::Timedelta64(median as i64)
}
fn assert_median(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
let actual = super::nanmedian(values);
assert!(
actual.semantic_eq(&expected),
"case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
);
}
assert_median(
usize::MAX,
"numeric_all_missing",
&[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
Scalar::Null(NullKind::NaN),
);
assert_median(
usize::MAX - 1,
"timedelta_all_missing",
&[Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)],
Scalar::Null(NullKind::NaN),
);
let mut seed = 0x0ab1_1eda_57a7_15e5_u64;
for case in 0..220 {
let len = (next(&mut seed) % 79 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 110));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 251),
5 => Scalar::Float64(raw as f64 / 61.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
assert_median(case, "numeric", &numeric, expected_numeric(&numeric));
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 110));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_median(
case,
"timedelta",
&timedeltas,
expected_timedelta(&timedeltas),
);
}
}
#[test]
fn nanvar_nanstd_nansem_match_numeric_and_timedelta_oracle_k7apg() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn numeric_samples(values: &[Scalar]) -> Vec<f64> {
values
.iter()
.filter(|value| !value.is_missing())
.filter_map(|value| value.to_f64().ok())
.collect()
}
fn timedelta_samples(values: &[Scalar]) -> Vec<f64> {
values
.iter()
.filter_map(|value| match value {
Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
_ => None,
})
.collect()
}
fn reductions_from_samples(samples: &[f64], ddof: usize) -> Option<(f64, f64, f64)> {
if samples.len() <= ddof {
return None;
}
let mean = samples.iter().sum::<f64>() / samples.len() as f64;
let sum_sq = samples
.iter()
.map(|value| (value - mean).powi(2))
.sum::<f64>();
let var = sum_sq / (samples.len() - ddof) as f64;
let std = var.sqrt();
let sem = std / (samples.len() as f64).sqrt();
Some((var, std, sem))
}
fn expected_numeric(values: &[Scalar], ddof: usize) -> (Scalar, Scalar, Scalar) {
let samples = numeric_samples(values);
let Some((var, std, sem)) = reductions_from_samples(&samples, ddof) else {
let missing = Scalar::Null(NullKind::NaN);
return (missing.clone(), missing.clone(), missing);
};
(
Scalar::Float64(var),
Scalar::Float64(std),
Scalar::Float64(sem),
)
}
fn expected_timedelta(values: &[Scalar], ddof: usize) -> (Scalar, Scalar, Scalar) {
let samples = timedelta_samples(values);
if samples.is_empty() {
let missing = Scalar::Null(NullKind::NaN);
return (missing.clone(), missing.clone(), missing);
}
let Some((var, std, sem)) = reductions_from_samples(&samples, ddof) else {
let missing = Scalar::Timedelta64(i64::MIN);
return (missing.clone(), missing.clone(), missing);
};
(
Scalar::Timedelta64(var as i64),
Scalar::Timedelta64(std as i64),
Scalar::Timedelta64(sem as i64),
)
}
fn assert_reductions(
case: usize,
family: &str,
values: &[Scalar],
ddof: usize,
expected: (Scalar, Scalar, Scalar),
) {
let (expected_var, expected_std, expected_sem) = expected;
let actual_var = super::nanvar(values, ddof);
let actual_std = super::nanstd(values, ddof);
let actual_sem = super::nansem(values, ddof);
assert!(
actual_var.semantic_eq(&expected_var),
"case={case} family={family} ddof={ddof}: expected var {expected_var:?}, got {actual_var:?} for {values:?}"
);
assert!(
actual_std.semantic_eq(&expected_std),
"case={case} family={family} ddof={ddof}: expected std {expected_std:?}, got {actual_std:?} for {values:?}"
);
assert!(
actual_sem.semantic_eq(&expected_sem),
"case={case} family={family} ddof={ddof}: expected sem {expected_sem:?}, got {actual_sem:?} for {values:?}"
);
}
let numeric_all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert_reductions(
usize::MAX,
"numeric_all_missing",
&numeric_all_missing,
0,
expected_numeric(&numeric_all_missing, 0),
);
let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
assert_reductions(
usize::MAX - 1,
"timedelta_all_missing",
&td_all_missing,
0,
expected_timedelta(&td_all_missing, 0),
);
let mut seed = 0x7a11_c0de_5eed_0421_u64;
for case in 0..240 {
let len = (next(&mut seed) % 83 + 1) as usize;
let ddof = (next(&mut seed) % 4) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Float64(case as f64 / 13.0));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 251),
5 => Scalar::Float64(raw as f64 / 73.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
assert_reductions(
case,
"numeric",
&numeric,
ddof,
expected_numeric(&numeric, ddof),
);
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 120));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_reductions(
case,
"timedelta",
&timedeltas,
ddof,
expected_timedelta(&timedeltas, ddof),
);
}
}
#[test]
fn nanvar_population() {
let vals = vec![
Scalar::Float64(2.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(7.0),
Scalar::Float64(9.0),
];
let var = super::nanvar(&vals, 0);
assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
if let Scalar::Float64(v) = var {
assert!((v - 4.0).abs() < 1e-10);
}
}
#[test]
fn nanvar_sample_ddof1() {
let vals = vec![
Scalar::Float64(2.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(7.0),
Scalar::Float64(9.0),
];
let var = super::nanvar(&vals, 1);
assert!(matches!(var, Scalar::Float64(_)), "expected Float64");
if let Scalar::Float64(v) = var {
assert!((v - 32.0 / 7.0).abs() < 1e-10);
}
}
#[test]
fn nanvar_insufficient_values_returns_nan() {
let vals = vec![Scalar::Float64(5.0)];
assert!(super::nanvar(&vals, 1).is_missing());
}
#[test]
fn nanstd_is_sqrt_of_var() {
let vals = vec![
Scalar::Float64(2.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(7.0),
Scalar::Float64(9.0),
];
let std = super::nanstd(&vals, 0);
assert!(matches!(std, Scalar::Float64(_)), "expected Float64");
if let Scalar::Float64(v) = std {
assert!((v - 2.0).abs() < 1e-10);
}
}
#[test]
fn nanmedian_timedelta64_preserves_dtype_j8ntk() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
];
assert_eq!(super::nanmedian(&vals), Scalar::Timedelta64(2 * one_hour));
}
#[test]
fn nanstd_timedelta64_preserves_dtype_j8ntk() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let vals = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
];
let std = super::nanstd(&vals, 0);
match std {
Scalar::Timedelta64(ns) => {
let expected = (2.0_f64 / 3.0).sqrt() * one_hour as f64;
assert!(
(ns as f64 - expected).abs() < 1e6,
"expected ~{expected} ns, got {ns}"
);
}
other => panic!("expected Timedelta64, got {other:?}"),
}
}
#[test]
fn nanstd_nansem_timedelta64_insufficient_returns_nat_j8ntk() {
let one_hour = 3_600 * 1_000_000_000_i64;
let vals = vec![Scalar::Timedelta64(one_hour)];
match super::nanstd(&vals, 1) {
Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
other => panic!("expected Timedelta64 NAT, got {other:?}"),
}
match super::nansem(&vals, 1) {
Scalar::Timedelta64(v) => assert_eq!(v, Timedelta::NAT),
other => panic!("expected Timedelta64 NAT, got {other:?}"),
}
}
#[test]
fn nanops_with_mixed_types() {
let vals = vec![
Scalar::Bool(true),
Scalar::Int64(3),
Scalar::Float64(6.0),
Scalar::Null(NullKind::Null),
];
assert_eq!(super::nansum(&vals), Scalar::Float64(10.0));
assert_eq!(super::nancount(&vals), Scalar::Int64(3));
}
#[test]
fn nanops_all_missing_returns_identity() {
let vals = vec![Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert_eq!(super::nansum(&vals), Scalar::Float64(0.0));
assert!(super::nanmean(&vals).is_missing());
assert!(super::nanmedian(&vals).is_missing());
assert!(super::nanvar(&vals, 0).is_missing());
assert!(super::nanstd(&vals, 0).is_missing());
}
#[test]
fn timedelta_parse_simple_units() {
use super::Timedelta;
assert_eq!(Timedelta::parse("1d").unwrap(), Timedelta::NANOS_PER_DAY);
assert_eq!(
Timedelta::parse("2h").unwrap(),
2 * Timedelta::NANOS_PER_HOUR
);
assert_eq!(
Timedelta::parse("30m").unwrap(),
30 * Timedelta::NANOS_PER_MIN
);
assert_eq!(
Timedelta::parse("45s").unwrap(),
45 * Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::parse("100ms").unwrap(),
100 * Timedelta::NANOS_PER_MILLI
);
assert_eq!(
Timedelta::parse("500us").unwrap(),
500 * Timedelta::NANOS_PER_MICRO
);
assert_eq!(Timedelta::parse("1000ns").unwrap(), 1000);
}
#[test]
fn timedelta_parse_compound() {
use super::Timedelta;
let expected = Timedelta::NANOS_PER_DAY
+ 2 * Timedelta::NANOS_PER_HOUR
+ 30 * Timedelta::NANOS_PER_MIN;
assert_eq!(Timedelta::parse("1d 2h 30m").unwrap(), expected);
assert_eq!(Timedelta::parse("1d2h30m").unwrap(), expected);
}
#[test]
fn timedelta_parse_iso8601_matches_pandas_tdiso() {
use super::Timedelta;
assert_eq!(Timedelta::parse("P1DT2H3M4S").unwrap(), 93_784_000_000_000);
assert_eq!(Timedelta::parse("PT1H").unwrap(), 3_600_000_000_000);
assert_eq!(Timedelta::parse("PT1H30M").unwrap(), 5_400_000_000_000);
assert_eq!(Timedelta::parse("P1D").unwrap(), 86_400_000_000_000);
assert_eq!(Timedelta::parse("P2W").unwrap(), 1_209_600_000_000_000);
assert_eq!(Timedelta::parse("PT0.5S").unwrap(), 500_000_000);
assert_eq!(Timedelta::parse("P1M").unwrap(), 60_000_000_000);
assert_eq!(Timedelta::parse("P1H").unwrap(), 3_600_000_000_000);
assert_eq!(Timedelta::parse("PT1D").unwrap(), 86_400_000_000_000);
assert_eq!(Timedelta::parse("P1D1H").unwrap(), 90_000_000_000_000);
assert_eq!(Timedelta::parse("-P1DT2H").unwrap(), -93_600_000_000_000);
assert!(Timedelta::parse("P1Y").is_err());
assert!(Timedelta::parse("p1d").is_err());
assert!(Timedelta::parse("P").is_err());
assert!(Timedelta::parse("PT").is_err());
}
#[test]
fn timedelta_parse_time_format() {
use super::Timedelta;
let expected = Timedelta::NANOS_PER_HOUR
+ 30 * Timedelta::NANOS_PER_MIN
+ 45 * Timedelta::NANOS_PER_SEC;
assert_eq!(Timedelta::parse("01:30:45").unwrap(), expected);
}
#[test]
fn timedelta_parse_time_fraction_rejects_unicode_without_panic() {
use super::{Timedelta, TimedeltaError};
let err = Timedelta::parse("00:00:00.\u{00e9}\u{00e9}\u{00e9}\u{00e9}\u{00e9}")
.expect_err("non-ASCII fractional seconds must reject");
assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
}
#[test]
fn timedelta_parse_time_format_rejects_overflow_without_panic() {
use super::{Timedelta, TimedeltaError};
let err = Timedelta::parse("9223372036854775807:00")
.expect_err("oversized hour component must reject");
assert!(matches!(err, TimedeltaError::InvalidFormat(_)));
}
#[test]
fn timedelta_parse_rejects_huge_value_overflow_zw3mg() {
use super::{Timedelta, TimedeltaError};
let huge = format!("{} days", "9".repeat(18));
assert!(matches!(
Timedelta::parse(&huge).expect_err("9...(18 9s) days must overflow"),
TimedeltaError::Overflow
));
}
#[test]
fn timedelta_parse_nat() {
use super::Timedelta;
assert_eq!(Timedelta::parse("NaT").unwrap(), Timedelta::NAT);
assert_eq!(Timedelta::parse("nat").unwrap(), Timedelta::NAT);
}
#[test]
fn timedelta_parse_negative() {
use super::Timedelta;
assert_eq!(Timedelta::parse("-1d").unwrap(), -Timedelta::NANOS_PER_DAY);
}
#[test]
fn timedelta_components() {
use super::Timedelta;
let nanos = Timedelta::NANOS_PER_DAY
+ Timedelta::NANOS_PER_HOUR
+ Timedelta::NANOS_PER_MIN
+ Timedelta::NANOS_PER_SEC
+ Timedelta::NANOS_PER_MILLI
+ 2 * Timedelta::NANOS_PER_MICRO
+ 3;
let comp = Timedelta::components(nanos);
assert_eq!(comp.days, 1);
assert_eq!(comp.hours, 1);
assert_eq!(comp.minutes, 1);
assert_eq!(comp.seconds, 1);
assert_eq!(comp.milliseconds, 1);
assert_eq!(comp.microseconds, 2);
assert_eq!(comp.nanoseconds, 3);
}
#[test]
fn timedelta_negative_components_floor_div() {
use super::Timedelta;
let neg_1s = -Timedelta::NANOS_PER_SEC;
assert_eq!(Timedelta::days(neg_1s), -1);
assert_eq!(Timedelta::seconds(neg_1s), 86399);
assert_eq!(Timedelta::microseconds(neg_1s), 0);
assert_eq!(Timedelta::nanoseconds(neg_1s), 0);
let comp = Timedelta::components(neg_1s);
assert_eq!(
(
comp.days,
comp.hours,
comp.minutes,
comp.seconds,
comp.milliseconds,
comp.microseconds,
comp.nanoseconds
),
(-1, 23, 59, 59, 0, 0, 0)
);
let neg = -86_401 * Timedelta::NANOS_PER_SEC;
assert_eq!(Timedelta::days(neg), -2);
assert_eq!(Timedelta::seconds(neg), 86399);
}
#[test]
fn timedelta_total_seconds() {
use super::Timedelta;
let nanos = 90_000_000_000i64; assert!((Timedelta::total_seconds(nanos) - 90.0).abs() < 1e-9);
assert!(Timedelta::total_seconds(Timedelta::NAT).is_nan());
}
#[test]
fn timedelta_format_basic() {
use super::Timedelta;
assert_eq!(Timedelta::format(Timedelta::NAT), "NaT");
assert_eq!(
Timedelta::format(Timedelta::NANOS_PER_DAY),
"1 days 00:00:00"
);
assert_eq!(
Timedelta::format(Timedelta::NANOS_PER_DAY + 2 * Timedelta::NANOS_PER_HOUR),
"1 days 02:00:00"
);
}
#[test]
fn timedelta_format_subsecond_matches_pandas() {
use super::Timedelta;
assert_eq!(
Timedelta::format(1_500_000_000), "0 days 00:00:01.500000"
);
assert_eq!(
Timedelta::format(1_000_000), "0 days 00:00:00.001000"
);
assert_eq!(
Timedelta::format(123_456_000), "0 days 00:00:00.123456"
);
assert_eq!(
Timedelta::format(500), "0 days 00:00:00.000000500"
);
assert_eq!(Timedelta::format(123_456_789), "0 days 00:00:00.123456789");
}
#[test]
fn timedelta_format_negative_uses_python_borrow_form() {
use super::Timedelta;
assert_eq!(Timedelta::format(-1_000_000_000), "-1 days +23:59:59");
assert_eq!(
Timedelta::format(-Timedelta::NANOS_PER_DAY),
"-1 days +00:00:00"
);
assert_eq!(
Timedelta::format(-25 * Timedelta::NANOS_PER_HOUR),
"-2 days +23:00:00"
);
assert_eq!(
Timedelta::format(-1_500_000_000),
"-1 days +23:59:58.500000"
);
assert_eq!(Timedelta::format(-500), "-1 days +23:59:59.999999500");
assert_eq!(Timedelta::format(-1), "-1 days +23:59:59.999999999");
}
#[test]
fn timedelta_isoformat_basic() {
use super::Timedelta;
assert_eq!(Timedelta::isoformat(Timedelta::NAT), "NaT");
assert_eq!(Timedelta::isoformat(0), "P0DT0H0M0S");
assert_eq!(Timedelta::isoformat(Timedelta::NANOS_PER_DAY), "P1DT0H0M0S");
assert_eq!(
Timedelta::isoformat(
Timedelta::NANOS_PER_DAY
+ 2 * Timedelta::NANOS_PER_HOUR
+ 30 * Timedelta::NANOS_PER_MIN
+ 45 * Timedelta::NANOS_PER_SEC
),
"P1DT2H30M45S"
);
assert_eq!(
Timedelta::isoformat(Timedelta::NANOS_PER_SEC + 500_000_000),
"P0DT0H0M1.5S"
);
assert_eq!(
Timedelta::isoformat(-(Timedelta::NANOS_PER_DAY + Timedelta::NANOS_PER_HOUR)),
"-P1DT1H0M0S"
);
}
#[test]
fn timedelta_floor_ceil_round() {
use super::Timedelta;
let nanos = Timedelta::NANOS_PER_HOUR + 30 * Timedelta::NANOS_PER_MIN;
assert_eq!(Timedelta::floor(nanos, "h"), Timedelta::NANOS_PER_HOUR);
assert_eq!(Timedelta::floor(nanos, "d"), 0);
assert_eq!(Timedelta::ceil(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
assert_eq!(Timedelta::ceil(nanos, "d"), Timedelta::NANOS_PER_DAY);
assert_eq!(Timedelta::round(nanos, "h"), 2 * Timedelta::NANOS_PER_HOUR);
assert_eq!(Timedelta::floor(Timedelta::NAT, "h"), Timedelta::NAT);
assert_eq!(Timedelta::ceil(Timedelta::NAT, "h"), Timedelta::NAT);
assert_eq!(Timedelta::round(Timedelta::NAT, "h"), Timedelta::NAT);
assert_eq!(Timedelta::floor(nanos, "invalid"), Timedelta::NAT);
}
#[test]
fn timedelta_floor_ceil_negative_use_euclidean_rounding_t79yh() {
use super::Timedelta;
assert_eq!(
Timedelta::floor(-1, "s"),
-Timedelta::NANOS_PER_SEC,
"floor(-1ns, 1s)"
);
assert_eq!(Timedelta::ceil(-1, "s"), 0, "ceil(-1ns, 1s)");
assert_eq!(
Timedelta::floor(-1_500_000_000, "s"),
-2 * Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::ceil(-1_500_000_000, "s"),
-Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::floor(-Timedelta::NANOS_PER_SEC, "s"),
-Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::ceil(-Timedelta::NANOS_PER_SEC, "s"),
-Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::floor(1_500_000_000, "s"),
Timedelta::NANOS_PER_SEC
);
assert_eq!(
Timedelta::ceil(1_500_000_000, "s"),
2 * Timedelta::NANOS_PER_SEC
);
}
#[test]
fn timedelta_scalar_dtype() {
let td = Scalar::Timedelta64(86_400_000_000_000);
assert_eq!(td.dtype(), DType::Timedelta64);
}
#[test]
fn timedelta_scalar_is_missing() {
use super::Timedelta;
let valid = Scalar::Timedelta64(1000);
let nat = Scalar::Timedelta64(Timedelta::NAT);
assert!(!valid.is_missing());
assert!(nat.is_missing());
}
#[test]
fn dtype_utf8_deserializes_legacy_aliases() {
let dtype: DType = serde_json::from_str("\"str\"").unwrap();
assert_eq!(dtype, DType::Utf8);
let dtype: DType = serde_json::from_str("\"string\"").unwrap();
assert_eq!(dtype, DType::Utf8);
}
#[test]
fn scalar_utf8_deserializes_legacy_aliases() {
let scalar: Scalar = serde_json::from_str(r#"{"kind":"str","value":"x"}"#).unwrap();
assert_eq!(scalar, Scalar::Utf8("x".to_owned()));
let scalar: Scalar = serde_json::from_str(r#"{"kind":"string","value":"y"}"#).unwrap();
assert_eq!(scalar, Scalar::Utf8("y".to_owned()));
}
#[test]
fn nancumsum_skips_nulls_and_accumulates() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(2.0),
Scalar::Float64(3.0),
];
let out = super::nancumsum(&values);
assert!(matches!(out[0], Scalar::Float64(v) if (v - 1.0).abs() < 1e-9));
assert!(out[1].is_missing());
assert!(matches!(out[2], Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
assert!(matches!(out[3], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
}
#[test]
fn nancumprod_skips_nulls_and_multiplies() {
let values = vec![
Scalar::Float64(2.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(3.0),
Scalar::Float64(4.0),
];
let out = super::nancumprod(&values);
assert!(matches!(out[0], Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
assert!(out[1].is_missing());
assert!(matches!(out[2], Scalar::Float64(v) if (v - 6.0).abs() < 1e-9));
assert!(matches!(out[3], Scalar::Float64(v) if (v - 24.0).abs() < 1e-9));
}
#[test]
fn nancummax_tracks_running_max() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Float64(3.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(2.0),
Scalar::Float64(5.0),
];
let out = super::nancummax(&values);
assert_eq!(out[0], Scalar::Float64(1.0));
assert_eq!(out[1], Scalar::Float64(3.0));
assert!(out[2].is_missing());
assert_eq!(out[3], Scalar::Float64(3.0));
assert_eq!(out[4], Scalar::Float64(5.0));
}
#[test]
fn nancummin_tracks_running_min() {
let values = vec![
Scalar::Float64(5.0),
Scalar::Float64(3.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(4.0),
Scalar::Float64(1.0),
];
let out = super::nancummin(&values);
assert_eq!(out[0], Scalar::Float64(5.0));
assert_eq!(out[1], Scalar::Float64(3.0));
assert!(out[2].is_missing());
assert_eq!(out[3], Scalar::Float64(3.0));
assert_eq!(out[4], Scalar::Float64(1.0));
}
#[test]
fn nancumsum_timedelta64_preserves_dtype_x0x91() {
let one_hour = 3_600 * 1_000_000_000_i64;
let values = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
];
let out = super::nancumsum(&values);
assert_eq!(out[0], Scalar::Timedelta64(one_hour));
assert_eq!(out[1], Scalar::Timedelta64(3 * one_hour));
assert_eq!(out[2], Scalar::Timedelta64(6 * one_hour));
}
#[test]
fn nancummax_nancummin_timedelta64_preserves_dtype_x0x91() {
let one_hour = 3_600 * 1_000_000_000_i64;
let values = vec![
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(5 * one_hour),
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(3 * one_hour),
];
let mx = super::nancummax(&values);
assert_eq!(mx[0], Scalar::Timedelta64(2 * one_hour));
assert_eq!(mx[1], Scalar::Timedelta64(5 * one_hour));
assert_eq!(mx[2], Scalar::Timedelta64(5 * one_hour));
assert_eq!(mx[3], Scalar::Timedelta64(5 * one_hour));
let mn = super::nancummin(&values);
assert_eq!(mn[0], Scalar::Timedelta64(2 * one_hour));
assert_eq!(mn[1], Scalar::Timedelta64(2 * one_hour));
assert_eq!(mn[2], Scalar::Timedelta64(one_hour));
assert_eq!(mn[3], Scalar::Timedelta64(one_hour));
}
#[test]
fn nancumulative_timedelta64_skips_nat_x0x91() {
let one_hour = 3_600 * 1_000_000_000_i64;
let values = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(Timedelta::NAT),
Scalar::Timedelta64(2 * one_hour),
];
let cs = super::nancumsum(&values);
assert_eq!(cs[0], Scalar::Timedelta64(one_hour));
assert!(cs[1].is_missing());
assert_eq!(cs[2], Scalar::Timedelta64(3 * one_hour));
}
#[test]
fn nancumulative_matches_numeric_and_timedelta_oracle_k63oz() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3202034522624059733)
.wrapping_add(4354685564936845319);
*seed
}
fn assert_vec(case: usize, family: &str, op: &str, actual: &[Scalar], expected: &[Scalar]) {
assert_eq!(
actual.len(),
expected.len(),
"case={case} family={family} op={op}: length mismatch"
);
for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
actual.semantic_eq(expected),
"case={case} family={family} op={op} pos={pos}: expected {expected:?}, got {actual:?}"
);
}
}
fn expected_numeric(
values: &[Scalar],
) -> (Vec<Scalar>, Vec<Scalar>, Vec<Scalar>, Vec<Scalar>) {
let mut sum = Vec::with_capacity(values.len());
let mut prod = Vec::with_capacity(values.len());
let mut max = Vec::with_capacity(values.len());
let mut min = Vec::with_capacity(values.len());
let mut running_sum = 0.0_f64;
let mut running_prod = 1.0_f64;
let mut running_max: Option<f64> = None;
let mut running_min: Option<f64> = None;
for value in values {
if value.is_missing() {
sum.push(Scalar::Null(NullKind::NaN));
prod.push(Scalar::Null(NullKind::NaN));
max.push(Scalar::Null(NullKind::NaN));
min.push(Scalar::Null(NullKind::NaN));
continue;
}
let Ok(value) = value.to_f64() else {
sum.push(Scalar::Null(NullKind::NaN));
prod.push(Scalar::Null(NullKind::NaN));
max.push(Scalar::Null(NullKind::NaN));
min.push(Scalar::Null(NullKind::NaN));
continue;
};
if value.is_nan() {
sum.push(Scalar::Null(NullKind::NaN));
prod.push(Scalar::Null(NullKind::NaN));
max.push(Scalar::Null(NullKind::NaN));
min.push(Scalar::Null(NullKind::NaN));
continue;
}
running_sum += value;
running_prod *= value;
running_max = Some(running_max.map_or(value, |current| current.max(value)));
running_min = Some(running_min.map_or(value, |current| current.min(value)));
sum.push(Scalar::Float64(running_sum));
prod.push(Scalar::Float64(running_prod));
max.push(Scalar::Float64(running_max.expect("initialized")));
min.push(Scalar::Float64(running_min.expect("initialized")));
}
(sum, prod, max, min)
}
fn expected_timedelta(values: &[Scalar]) -> (Vec<Scalar>, Vec<Scalar>, Vec<Scalar>) {
let mut sum = Vec::with_capacity(values.len());
let mut max = Vec::with_capacity(values.len());
let mut min = Vec::with_capacity(values.len());
let mut running_sum = 0_i128;
let mut running_max: Option<i64> = None;
let mut running_min: Option<i64> = None;
for value in values {
if value.is_missing() {
sum.push(Scalar::Null(NullKind::NaT));
max.push(Scalar::Null(NullKind::NaT));
min.push(Scalar::Null(NullKind::NaT));
continue;
}
let Scalar::Timedelta64(ns) = value else {
sum.push(Scalar::Null(NullKind::NaT));
max.push(Scalar::Null(NullKind::NaT));
min.push(Scalar::Null(NullKind::NaT));
continue;
};
running_sum = running_sum.saturating_add(i128::from(*ns));
let clamped = running_sum.clamp(i128::from(i64::MIN), i128::from(i64::MAX));
running_max = Some(running_max.map_or(*ns, |current| current.max(*ns)));
running_min = Some(running_min.map_or(*ns, |current| current.min(*ns)));
sum.push(Scalar::Timedelta64(clamped as i64));
max.push(Scalar::Timedelta64(running_max.expect("initialized")));
min.push(Scalar::Timedelta64(running_min.expect("initialized")));
}
(sum, max, min)
}
let mut seed = 0xc0de_c63a_5eed_0421_u64;
for case in 0..260 {
let len = (next(&mut seed) % 89 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 251),
5 => Scalar::Float64(raw as f64 / 79.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
let (sum, prod, max, min) = expected_numeric(&numeric);
assert_vec(case, "numeric", "cumsum", &super::nancumsum(&numeric), &sum);
assert_vec(
case,
"numeric",
"cumprod",
&super::nancumprod(&numeric),
&prod,
);
assert_vec(case, "numeric", "cummax", &super::nancummax(&numeric), &max);
assert_vec(case, "numeric", "cummin", &super::nancummin(&numeric), &min);
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
let (td_sum, td_max, td_min) = expected_timedelta(&timedeltas);
assert_vec(
case,
"timedelta",
"cumsum",
&super::nancumsum(&timedeltas),
&td_sum,
);
assert_vec(
case,
"timedelta",
"cummax",
&super::nancummax(&timedeltas),
&td_max,
);
assert_vec(
case,
"timedelta",
"cummin",
&super::nancummin(&timedeltas),
&td_min,
);
}
}
#[test]
fn nanquantile_linear_interpolation_matches_numpy() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Float64(2.0),
Scalar::Float64(3.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
];
let q = super::nanquantile(&values, 0.5);
assert!(matches!(q, Scalar::Float64(v) if (v - 3.0).abs() < 1e-9));
let q25 = super::nanquantile(&values, 0.25);
assert!(matches!(q25, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
}
#[test]
fn nanquantile_ignores_nulls() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(3.0),
];
let q = super::nanquantile(&values, 0.5);
assert!(matches!(q, Scalar::Float64(v) if (v - 2.0).abs() < 1e-9));
}
#[test]
fn nanquantile_empty_and_out_of_range_yield_null() {
assert!(super::nanquantile(&[], 0.5).is_missing());
assert!(super::nanquantile(&[Scalar::Float64(1.0)], 1.5).is_missing());
assert!(super::nanquantile(&[Scalar::Float64(1.0)], -0.1).is_missing());
}
#[test]
fn nanquantile_timedelta64_preserves_dtype_5djk7() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let vals = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
Scalar::Timedelta64(4 * one_hour),
Scalar::Timedelta64(5 * one_hour),
];
assert_eq!(
super::nanquantile(&vals, 0.5),
Scalar::Timedelta64(3 * one_hour)
);
assert_eq!(
super::nanquantile(&vals, 0.0),
Scalar::Timedelta64(one_hour)
);
assert_eq!(
super::nanquantile(&vals, 1.0),
Scalar::Timedelta64(5 * one_hour)
);
}
#[test]
fn nanquantile_timedelta64_linear_interpolation_5djk7() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let vals = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(3 * one_hour),
];
assert_eq!(
super::nanquantile(&vals, 0.5),
Scalar::Timedelta64(2 * one_hour)
);
}
#[test]
fn nanquantile_matches_numeric_and_timedelta_oracle_ecb7r() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3202034522624059733)
.wrapping_add(4354685564936845319);
*seed
}
fn interpolated(sorted: &[f64], q: f64) -> f64 {
if sorted.len() == 1 {
return sorted[0];
}
let pos = q * (sorted.len() - 1) as f64;
let lo = pos.floor() as usize;
let hi = pos.ceil() as usize;
if lo == hi {
sorted[lo]
} else {
let weight = pos - lo as f64;
sorted[lo] + (sorted[hi] - sorted[lo]) * weight
}
}
fn expected_numeric(values: &[Scalar], q: f64) -> Scalar {
if !(0.0..=1.0).contains(&q) {
return Scalar::Null(NullKind::NaN);
}
let mut samples = values
.iter()
.filter(|value| !value.is_missing())
.filter_map(|value| value.to_f64().ok())
.collect::<Vec<_>>();
if samples.is_empty() {
return Scalar::Null(NullKind::NaN);
}
samples.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
Scalar::Float64(interpolated(&samples, q))
}
fn expected_timedelta(values: &[Scalar], q: f64) -> Scalar {
if !(0.0..=1.0).contains(&q) {
return Scalar::Null(NullKind::NaN);
}
let mut samples = values
.iter()
.filter_map(|value| match value {
Scalar::Timedelta64(ns) if !value.is_missing() => Some(*ns as f64),
_ => None,
})
.collect::<Vec<_>>();
if samples.is_empty() {
return Scalar::Null(NullKind::NaN);
}
samples.sort_by(|left, right| left.partial_cmp(right).expect("finite values"));
Scalar::Timedelta64(interpolated(&samples, q) as i64)
}
fn assert_quantile(case: usize, family: &str, values: &[Scalar], q: f64, expected: Scalar) {
let actual = super::nanquantile(values, q);
assert!(
actual.semantic_eq(&expected),
"case={case} family={family} q={q}: expected {expected:?}, got {actual:?} for {values:?}"
);
}
let numeric_all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert_quantile(
usize::MAX,
"numeric_all_missing",
&numeric_all_missing,
0.5,
expected_numeric(&numeric_all_missing, 0.5),
);
assert_quantile(
usize::MAX - 1,
"numeric_out_of_range",
&[Scalar::Float64(1.0), Scalar::Float64(2.0)],
1.25,
Scalar::Null(NullKind::NaN),
);
let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
assert_quantile(
usize::MAX - 2,
"timedelta_all_missing",
&td_all_missing,
0.5,
expected_timedelta(&td_all_missing, 0.5),
);
assert_quantile(
usize::MAX - 3,
"timedelta_out_of_range",
&[Scalar::Timedelta64(1), Scalar::Timedelta64(2)],
-0.25,
Scalar::Null(NullKind::NaN),
);
let mut seed = 0x4a17_1e5e_0b5e_a11d_u64;
for case in 0..260 {
let len = (next(&mut seed) % 83 + 1) as usize;
let q = match next(&mut seed) % 8 {
0 => 0.0,
1 => 0.25,
2 => 0.5,
3 => 0.75,
4 => 1.0,
_ => (next(&mut seed) % 1_001) as f64 / 1_000.0,
};
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 251),
5 => Scalar::Float64(raw as f64 / 67.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
assert_quantile(case, "numeric", &numeric, q, expected_numeric(&numeric, q));
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_quantile(
case,
"timedelta",
&timedeltas,
q,
expected_timedelta(&timedeltas, q),
);
}
}
#[test]
fn nanargmax_returns_first_position() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(2.0),
];
assert_eq!(super::nanargmax(&values), Some(2));
}
#[test]
fn nanargmin_returns_first_position() {
let values = vec![
Scalar::Float64(3.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(1.0),
Scalar::Float64(1.0),
];
assert_eq!(super::nanargmin(&values), Some(2));
}
#[test]
fn nanargmax_all_missing_returns_none() {
let values = vec![Scalar::Null(NullKind::NaN), Scalar::Null(NullKind::Null)];
assert_eq!(super::nanargmax(&values), None);
assert_eq!(super::nanargmin(&values), None);
}
#[test]
fn nanargmax_nanargmin_match_numeric_and_timedelta_oracle_unkj6() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn expected_numeric(values: &[Scalar], find_max: bool) -> Option<usize> {
let mut best: Option<(usize, f64)> = None;
for (idx, value) in values.iter().enumerate() {
if value.is_missing() {
continue;
}
let Ok(value) = value.to_f64() else {
continue;
};
if value.is_nan() {
continue;
}
match best {
None => best = Some((idx, value)),
Some((_, current))
if (find_max && value > current) || (!find_max && value < current) =>
{
best = Some((idx, value));
}
_ => {}
}
}
best.map(|(idx, _)| idx)
}
fn expected_timedelta(values: &[Scalar], find_max: bool) -> Option<usize> {
let mut best: Option<(usize, i64)> = None;
for (idx, value) in values.iter().enumerate() {
if value.is_missing() {
continue;
}
let Scalar::Timedelta64(ns) = value else {
continue;
};
match best {
None => best = Some((idx, *ns)),
Some((_, current))
if (find_max && *ns > current) || (!find_max && *ns < current) =>
{
best = Some((idx, *ns));
}
_ => {}
}
}
best.map(|(idx, _)| idx)
}
fn assert_args(
case: usize,
family: &str,
values: &[Scalar],
expected_min: Option<usize>,
expected_max: Option<usize>,
) {
assert_eq!(
super::nanargmin(values),
expected_min,
"case={case} family={family}: nanargmin mismatch for {values:?}"
);
assert_eq!(
super::nanargmax(values),
expected_max,
"case={case} family={family}: nanargmax mismatch for {values:?}"
);
}
let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert_args(usize::MAX, "numeric_all_missing", &all_missing, None, None);
let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
assert_args(
usize::MAX - 1,
"timedelta_all_missing",
&td_all_missing,
None,
None,
);
let mut seed = 0xa126_5eed_ed9e_u64;
for case in 0..260 {
let len = (next(&mut seed) % 83 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 130));
if len > 1 {
numeric.push(Scalar::Int64(case as i64 - 130));
}
for _ in numeric.len()..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 9 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 211),
5 => Scalar::Float64(raw as f64 / 47.0),
6 => Scalar::Float64(0.0),
7 => Scalar::Float64(-0.0),
_ => Scalar::Float64(raw.signum() as f64 * f64::INFINITY),
});
}
assert_args(
case,
"numeric",
&numeric,
expected_numeric(&numeric, false),
expected_numeric(&numeric, true),
);
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
if len > 1 {
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
}
for _ in timedeltas.len()..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_args(
case,
"timedelta",
&timedeltas,
expected_timedelta(&timedeltas, false),
expected_timedelta(&timedeltas, true),
);
}
}
#[test]
fn nansem_matches_std_over_sqrt_n() {
let values = vec![
Scalar::Float64(2.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(7.0),
Scalar::Float64(9.0),
];
let sem = super::nansem(&values, 1);
assert!(matches!(sem, Scalar::Float64(_)));
let Scalar::Float64(v) = sem else {
return;
};
assert!((v - 0.7559289460184544).abs() < 1e-9);
}
#[test]
fn nansem_empty_returns_null() {
assert!(super::nansem(&[], 1).is_missing());
assert!(super::nansem(&[Scalar::Float64(1.0)], 1).is_missing());
}
#[test]
fn nanptp_returns_max_minus_min() {
let values = vec![
Scalar::Float64(3.0),
Scalar::Null(NullKind::NaN),
Scalar::Float64(7.0),
Scalar::Float64(1.0),
];
assert_eq!(super::nanptp(&values), Scalar::Float64(6.0));
}
#[test]
fn nanptp_empty_returns_null() {
assert!(super::nanptp(&[]).is_missing());
assert!(super::nanptp(&[Scalar::Null(NullKind::NaN)]).is_missing());
}
#[test]
fn nanptp_timedelta64_preserves_dtype_u2g0r() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let values = vec![
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(5 * one_hour),
Scalar::Timedelta64(2 * one_hour),
];
assert_eq!(super::nanptp(&values), Scalar::Timedelta64(4 * one_hour));
}
#[test]
fn nanptp_matches_numeric_and_timedelta_oracle_affjt() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3202034522624059733)
.wrapping_add(4354685564936845319);
*seed
}
fn expected_numeric(values: &[Scalar]) -> Scalar {
let mut lo = f64::INFINITY;
let mut hi = f64::NEG_INFINITY;
let mut seen = false;
for value in values {
if value.is_missing() {
continue;
}
if let Ok(value) = value.to_f64() {
seen = true;
lo = lo.min(value);
hi = hi.max(value);
}
}
if seen {
Scalar::Float64(hi - lo)
} else {
Scalar::Null(NullKind::NaN)
}
}
fn expected_timedelta(values: &[Scalar]) -> Scalar {
let mut lo = i64::MAX;
let mut hi = i64::MIN;
let mut seen = false;
for value in values {
if let Scalar::Timedelta64(ns) = value
&& !value.is_missing()
{
seen = true;
lo = lo.min(*ns);
hi = hi.max(*ns);
}
}
if seen {
Scalar::Timedelta64(hi - lo)
} else {
Scalar::Null(NullKind::NaN)
}
}
fn assert_ptp(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
let actual = super::nanptp(values);
assert!(
actual.semantic_eq(&expected),
"case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
);
}
assert_ptp(
usize::MAX,
"numeric_all_missing",
&[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
Scalar::Null(NullKind::NaN),
);
assert_ptp(
usize::MAX - 1,
"timedelta_all_missing",
&[Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)],
Scalar::Null(NullKind::NaN),
);
let mut seed = 0xa22f_17ed_57a7_15e5_u64;
for case in 0..260 {
let len = (next(&mut seed) % 83 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
numeric.push(match next(&mut seed) % 9 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw),
5 => Scalar::Float64(raw as f64 / 53.0),
6 => Scalar::Float64(0.0),
7 => Scalar::Float64(-0.0),
_ => Scalar::Float64(raw.signum() as f64 * f64::INFINITY),
});
}
assert_ptp(case, "numeric", &numeric, expected_numeric(&numeric));
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 130));
for _ in 1..len {
let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_ptp(
case,
"timedelta",
&timedeltas,
expected_timedelta(&timedeltas),
);
}
}
#[test]
fn nanargmax_nanargmin_timedelta64_compare_by_ns_ql1t5() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let values = vec![
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(5 * one_hour),
Scalar::Timedelta64(one_hour),
Scalar::Timedelta64(3 * one_hour),
];
assert_eq!(super::nanargmax(&values), Some(1));
assert_eq!(super::nanargmin(&values), Some(2));
}
#[test]
fn nanprod_timedelta64_returns_null_szq6a() {
let one_hour: i64 = 3_600 * 1_000_000_000;
let values = vec![
Scalar::Timedelta64(2 * one_hour),
Scalar::Timedelta64(3 * one_hour),
];
assert!(super::nanprod(&values).is_missing());
}
#[test]
fn nanprod_matches_numeric_and_timedelta_oracle_9938h() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn expected_numeric(values: &[Scalar]) -> Scalar {
let mut product = 1.0_f64;
for value in values {
if value.is_missing() {
continue;
}
if let Ok(value) = value.to_f64() {
product *= value;
}
}
Scalar::Float64(product)
}
fn expected_timedelta(values: &[Scalar]) -> Scalar {
if values
.iter()
.any(|value| matches!(value, Scalar::Timedelta64(_)) && !value.is_missing())
{
Scalar::Null(NullKind::NaN)
} else {
Scalar::Float64(1.0)
}
}
fn assert_prod(case: usize, family: &str, values: &[Scalar], expected: Scalar) {
let actual = super::nanprod(values);
assert!(
actual.semantic_eq(&expected),
"case={case} family={family}: expected {expected:?}, got {actual:?} for {values:?}"
);
}
let all_missing = [Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)];
assert_prod(
usize::MAX,
"numeric_all_missing",
&all_missing,
Scalar::Float64(1.0),
);
let td_all_missing = [Scalar::Timedelta64(i64::MIN), Scalar::Null(NullKind::NaN)];
assert_prod(
usize::MAX - 1,
"timedelta_all_missing",
&td_all_missing,
expected_timedelta(&td_all_missing),
);
let mut seed = 0x6e0d_9938_a11c_0de5_u64;
for case in 0..280 {
let len = (next(&mut seed) % 89 + 1) as usize;
let mut numeric = Vec::with_capacity(len);
numeric.push(Scalar::Int64((case % 17) as i64 - 8));
for _ in 1..len {
let raw = (next(&mut seed) % 2_001) as i64 - 1_000;
numeric.push(match next(&mut seed) % 9 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 19),
5 => Scalar::Float64(raw as f64 / 97.0),
6 => Scalar::Float64(0.0),
7 => Scalar::Float64(-0.0),
_ => Scalar::Float64(1.0),
});
}
assert_prod(case, "numeric", &numeric, expected_numeric(&numeric));
let mut timedeltas = Vec::with_capacity(len);
timedeltas.push(Scalar::Timedelta64(case as i64 - 140));
for _ in 1..len {
let raw = (next(&mut seed) % 10_001) as i64 - 5_000;
timedeltas.push(match next(&mut seed) % 7 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Timedelta64(i64::MIN),
_ => Scalar::Timedelta64(raw),
});
}
assert_prod(
case,
"timedelta",
&timedeltas,
expected_timedelta(&timedeltas),
);
}
}
#[test]
fn nanskew_symmetric_distribution_near_zero() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Float64(2.0),
Scalar::Float64(3.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
];
let skew = super::nanskew(&values);
assert!(matches!(skew, Scalar::Float64(_)));
let Scalar::Float64(v) = skew else {
return;
};
assert!(v.abs() < 1e-9);
}
#[test]
fn nanskew_too_few_values_returns_null() {
assert!(super::nanskew(&[]).is_missing());
assert!(super::nanskew(&[Scalar::Float64(1.0), Scalar::Float64(2.0)]).is_missing());
}
#[test]
fn nankurt_symmetric_uniform_distribution() {
let values = vec![
Scalar::Float64(1.0),
Scalar::Float64(2.0),
Scalar::Float64(3.0),
Scalar::Float64(4.0),
Scalar::Float64(5.0),
];
let kurt = super::nankurt(&values);
assert!(matches!(kurt, Scalar::Float64(_)));
let Scalar::Float64(v) = kurt else {
return;
};
assert!((v + 1.2).abs() < 1e-9);
}
#[test]
fn nankurt_too_few_values_returns_null() {
let vals: Vec<Scalar> = (0..3).map(|i| Scalar::Float64(i as f64)).collect();
assert!(super::nankurt(&vals).is_missing());
}
#[test]
fn nanskew_constant_series_returns_zero() {
let values = vec![
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
];
assert_eq!(super::nanskew(&values), Scalar::Float64(0.0));
assert_eq!(
super::nankurt(&[
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
Scalar::Float64(5.0),
]),
Scalar::Float64(0.0)
);
}
#[test]
fn nanskew_nankurt_match_numeric_oracle_jr7zk() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn samples(values: &[Scalar]) -> Vec<f64> {
values
.iter()
.filter(|value| !value.is_missing())
.filter_map(|value| value.to_f64().ok())
.collect()
}
fn expected_skew(values: &[Scalar]) -> Scalar {
let samples = samples(values);
let n = samples.len() as f64;
if n < 3.0 {
return Scalar::Null(NullKind::NaN);
}
let mean = samples.iter().sum::<f64>() / n;
let m2 = samples
.iter()
.map(|value| (value - mean).powi(2))
.sum::<f64>();
let m3 = samples
.iter()
.map(|value| (value - mean).powi(3))
.sum::<f64>();
let s2 = m2 / (n - 1.0);
if s2 == 0.0 {
return Scalar::Float64(0.0);
}
Scalar::Float64((n / ((n - 1.0) * (n - 2.0))) * (m3 / s2.powf(1.5)))
}
fn expected_kurt(values: &[Scalar]) -> Scalar {
let samples = samples(values);
let n = samples.len() as f64;
if n < 4.0 {
return Scalar::Null(NullKind::NaN);
}
let mean = samples.iter().sum::<f64>() / n;
let m2 = samples
.iter()
.map(|value| (value - mean).powi(2))
.sum::<f64>();
let m4 = samples
.iter()
.map(|value| (value - mean).powi(4))
.sum::<f64>();
let s2 = m2 / (n - 1.0);
if s2 == 0.0 {
return Scalar::Float64(0.0);
}
let adj = (n * (n + 1.0)) / ((n - 1.0) * (n - 2.0) * (n - 3.0));
let sub = (3.0 * (n - 1.0).powi(2)) / ((n - 2.0) * (n - 3.0));
Scalar::Float64(adj * (m4 / (s2 * s2)) - sub)
}
fn assert_moments(case: usize, values: &[Scalar]) {
let expected_skew = expected_skew(values);
let expected_kurt = expected_kurt(values);
let actual_skew = super::nanskew(values);
let actual_kurt = super::nankurt(values);
assert!(
actual_skew.semantic_eq(&expected_skew),
"case={case}: expected skew {expected_skew:?}, got {actual_skew:?} for {values:?}"
);
assert!(
actual_kurt.semantic_eq(&expected_kurt),
"case={case}: expected kurt {expected_kurt:?}, got {actual_kurt:?} for {values:?}"
);
}
assert_moments(
usize::MAX,
&[Scalar::Null(NullKind::Null), Scalar::Float64(f64::NAN)],
);
assert_moments(
usize::MAX - 1,
&[
Scalar::Float64(7.0),
Scalar::Float64(7.0),
Scalar::Float64(7.0),
Scalar::Float64(7.0),
],
);
let mut seed = 0x5ce7_9a55_a11c_0de5_u64;
for case in 0..260 {
let len = (next(&mut seed) % 89 + 1) as usize;
let mut values = Vec::with_capacity(len);
if case % 11 == 0 {
values.extend((0..len).map(|_| Scalar::Float64(3.0)));
} else {
values.push(Scalar::Float64(case as f64 / 19.0));
for _ in 1..len {
let raw = (next(&mut seed) % 20_001) as i64 - 10_000;
values.push(match next(&mut seed) % 8 {
0 => Scalar::Null(NullKind::Null),
1 => Scalar::Null(NullKind::NaN),
2 => Scalar::Float64(f64::NAN),
3 => Scalar::Bool(raw & 1 == 0),
4 => Scalar::Int64(raw % 251),
5 => Scalar::Float64(raw as f64 / 83.0),
6 => Scalar::Float64(0.0),
_ => Scalar::Float64(-0.0),
});
}
}
assert_moments(case, &values);
}
}
#[test]
fn interval_default_closed_is_right() {
assert_eq!(IntervalClosed::default(), IntervalClosed::Right);
}
#[test]
fn interval_left_and_right_closed_helpers() {
assert!(IntervalClosed::Left.left_closed());
assert!(!IntervalClosed::Left.right_closed());
assert!(!IntervalClosed::Right.left_closed());
assert!(IntervalClosed::Right.right_closed());
assert!(IntervalClosed::Both.left_closed());
assert!(IntervalClosed::Both.right_closed());
assert!(!IntervalClosed::Neither.left_closed());
assert!(!IntervalClosed::Neither.right_closed());
}
#[test]
fn interval_display_matches_pandas_notation() {
assert_eq!(
Interval::new(0.0, 5.0, IntervalClosed::Right).to_string(),
"(0.0, 5.0]"
);
assert_eq!(
Interval::new(0.0, 5.0, IntervalClosed::Left).to_string(),
"[0.0, 5.0)"
);
assert_eq!(
Interval::new(0.0, 5.0, IntervalClosed::Both).to_string(),
"[0.0, 5.0]"
);
assert_eq!(
Interval::new(0.0, 5.0, IntervalClosed::Neither).to_string(),
"(0.0, 5.0)"
);
assert_eq!(
Interval::new(2.5, 3.5, IntervalClosed::Right).to_string(),
"(2.5, 3.5]"
);
assert_eq!(
Interval::new(-1.0, 0.0, IntervalClosed::Right).to_string(),
"(-1.0, 0.0]"
);
assert_eq!(
Interval::new(1e20, 2e20, IntervalClosed::Right).to_string(),
"(1e+20, 2e+20]"
);
}
#[test]
fn interval_length_and_mid() {
let i = Interval::new(2.0, 10.0, IntervalClosed::Right);
assert_eq!(i.length(), 8.0);
assert_eq!(i.mid(), 6.0);
}
#[test]
fn interval_contains_matches_closed_policy() {
let right = Interval::new(0.0, 5.0, IntervalClosed::Right);
assert!(!right.contains(0.0));
assert!(right.contains(2.5));
assert!(right.contains(5.0));
let left = Interval::new(0.0, 5.0, IntervalClosed::Left);
assert!(left.contains(0.0));
assert!(left.contains(2.5));
assert!(!left.contains(5.0));
let both = Interval::new(0.0, 5.0, IntervalClosed::Both);
assert!(both.contains(0.0));
assert!(both.contains(5.0));
let neither = Interval::new(0.0, 5.0, IntervalClosed::Neither);
assert!(!neither.contains(0.0));
assert!(!neither.contains(5.0));
assert!(neither.contains(2.5));
}
#[test]
fn interval_contains_nan_returns_false() {
let i = Interval::new(0.0, 10.0, IntervalClosed::Both);
assert!(!i.contains(f64::NAN));
}
#[test]
fn interval_is_empty_matches_pandas() {
assert!(Interval::new(3.0, 3.0, IntervalClosed::Right).is_empty());
assert!(Interval::new(3.0, 3.0, IntervalClosed::Left).is_empty());
assert!(Interval::new(3.0, 3.0, IntervalClosed::Neither).is_empty());
assert!(!Interval::new(3.0, 3.0, IntervalClosed::Both).is_empty());
assert!(!Interval::new(0.0, 5.0, IntervalClosed::Right).is_empty());
}
#[test]
fn interval_overlaps_disjoint_returns_false() {
let a = Interval::new(0.0, 1.0, IntervalClosed::Right);
let b = Interval::new(2.0, 3.0, IntervalClosed::Right);
assert!(!a.overlaps(&b));
assert!(!b.overlaps(&a));
}
#[test]
fn interval_overlaps_nested_returns_true() {
let outer = Interval::new(0.0, 10.0, IntervalClosed::Right);
let inner = Interval::new(3.0, 7.0, IntervalClosed::Right);
assert!(outer.overlaps(&inner));
assert!(inner.overlaps(&outer));
}
#[test]
fn interval_overlaps_touching_respects_closed_policy() {
let right_right = (
Interval::new(0.0, 1.0, IntervalClosed::Right),
Interval::new(1.0, 2.0, IntervalClosed::Right),
);
assert!(!right_right.0.overlaps(&right_right.1));
let both_both = (
Interval::new(0.0, 1.0, IntervalClosed::Both),
Interval::new(1.0, 2.0, IntervalClosed::Both),
);
assert!(both_both.0.overlaps(&both_both.1));
}
#[test]
fn interval_roundtrips_through_serde_json() {
let i = Interval::new(1.5, 3.25, IntervalClosed::Both);
let json = serde_json::to_string(&i).expect("serialize");
let back: Interval = serde_json::from_str(&json).expect("deserialize");
assert_eq!(i, back);
}
#[test]
fn interval_serde_default_closed_is_right_when_missing() {
let back: Interval =
serde_json::from_str(r#"{"left":0.0,"right":5.0}"#).expect("deserialize");
assert_eq!(back.closed, IntervalClosed::Right);
}
#[test]
fn period_freq_parses_canonical_aliases() {
assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
assert_eq!(PeriodFreq::parse("M"), Some(PeriodFreq::Monthly));
assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
assert_eq!(PeriodFreq::parse("D"), Some(PeriodFreq::Daily));
assert_eq!(PeriodFreq::parse("B"), Some(PeriodFreq::Business));
assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
assert_eq!(PeriodFreq::parse("min"), Some(PeriodFreq::Minutely));
assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
}
#[test]
fn period_freq_parse_is_case_insensitive() {
assert_eq!(PeriodFreq::parse("quarterly"), Some(PeriodFreq::Quarterly));
assert_eq!(PeriodFreq::parse("MONTHLY"), Some(PeriodFreq::Monthly));
}
#[test]
fn period_freq_rejects_unknown_aliases() {
assert_eq!(PeriodFreq::parse("nanosec"), None);
assert_eq!(PeriodFreq::parse(""), None);
assert_eq!(PeriodFreq::parse("xyz"), None);
}
#[test]
fn period_freq_alias_roundtrip() {
for freq in [
PeriodFreq::Annual,
PeriodFreq::Quarterly,
PeriodFreq::Monthly,
PeriodFreq::Weekly,
PeriodFreq::Daily,
PeriodFreq::Business,
PeriodFreq::Hourly,
PeriodFreq::Minutely,
PeriodFreq::Secondly,
] {
assert_eq!(PeriodFreq::parse(freq.alias()), Some(freq));
}
}
#[test]
fn period_freq_anchored_aliases_are_pandas_canonical_h2wiv() {
assert_eq!(PeriodFreq::Annual.alias(), "Y-DEC");
assert_eq!(PeriodFreq::Quarterly.alias(), "Q-DEC");
assert_eq!(PeriodFreq::Weekly.alias(), "W-SUN");
assert_eq!(PeriodFreq::parse("A"), Some(PeriodFreq::Annual));
assert_eq!(PeriodFreq::parse("Y"), Some(PeriodFreq::Annual));
assert_eq!(PeriodFreq::parse("Y-DEC"), Some(PeriodFreq::Annual));
assert_eq!(PeriodFreq::parse("Q"), Some(PeriodFreq::Quarterly));
assert_eq!(PeriodFreq::parse("Q-DEC"), Some(PeriodFreq::Quarterly));
assert_eq!(PeriodFreq::parse("W"), Some(PeriodFreq::Weekly));
assert_eq!(PeriodFreq::parse("W-SUN"), Some(PeriodFreq::Weekly));
}
#[test]
fn period_freq_intraday_aliases_are_pandas_canonical_8kfdo() {
assert_eq!(PeriodFreq::Hourly.alias(), "h");
assert_eq!(PeriodFreq::Minutely.alias(), "min");
assert_eq!(PeriodFreq::Secondly.alias(), "s");
assert_eq!(PeriodFreq::parse("H"), Some(PeriodFreq::Hourly));
assert_eq!(PeriodFreq::parse("T"), Some(PeriodFreq::Minutely));
assert_eq!(PeriodFreq::parse("S"), Some(PeriodFreq::Secondly));
}
#[test]
fn period_scalar_accessors_match_pandas_star8() {
let period = Period::new(600, PeriodFreq::Monthly);
assert_eq!(period.ordinal(), 600);
assert_eq!(period.freq(), PeriodFreq::Monthly);
assert_eq!(period.freqstr(), "M");
}
#[test]
fn period_parse_common_pandas_ordinals_avm08() {
assert_eq!(
Period::parse("2024").unwrap(),
Period::new(54, PeriodFreq::Annual)
);
assert_eq!(
Period::parse("2024Q1").unwrap(),
Period::new(216, PeriodFreq::Quarterly)
);
assert_eq!(
Period::parse("2024-01").unwrap(),
Period::new(648, PeriodFreq::Monthly)
);
assert_eq!(
Period::parse("2024-01-15").unwrap(),
Period::new(19_737, PeriodFreq::Daily)
);
assert!(Period::parse("216").is_err());
}
#[test]
fn period_shift_advances_ordinal() {
let q1 = Period::new(216, PeriodFreq::Quarterly);
let q2 = q1.shift(1);
assert_eq!(q2.ordinal, 217);
assert_eq!(q2.freq, PeriodFreq::Quarterly);
let q0 = q1.shift(-1);
assert_eq!(q0.ordinal, 215);
}
#[test]
fn period_shift_saturates_on_overflow() {
let p = Period::new(i64::MAX - 2, PeriodFreq::Daily);
assert_eq!(p.shift(100).ordinal, i64::MAX);
let p = Period::new(i64::MIN + 2, PeriodFreq::Daily);
assert_eq!(p.shift(-100).ordinal, i64::MIN);
}
#[test]
fn period_diff_returns_period_count() {
let a = Period::new(216, PeriodFreq::Quarterly);
let b = Period::new(220, PeriodFreq::Quarterly);
assert_eq!(b.diff(&a), Some(4));
assert_eq!(a.diff(&b), Some(-4));
}
#[test]
fn period_diff_rejects_mismatched_freq() {
let monthly = Period::new(100, PeriodFreq::Monthly);
let quarterly = Period::new(100, PeriodFreq::Quarterly);
assert_eq!(monthly.diff(&quarterly), None);
assert_eq!(quarterly.diff(&monthly), None);
}
#[test]
fn period_cmp_same_freq_respects_ordinal_order() {
use std::cmp::Ordering;
let a = Period::new(10, PeriodFreq::Monthly);
let b = Period::new(20, PeriodFreq::Monthly);
assert_eq!(a.cmp_same_freq(&b), Some(Ordering::Less));
assert_eq!(b.cmp_same_freq(&a), Some(Ordering::Greater));
assert_eq!(a.cmp_same_freq(&a), Some(Ordering::Equal));
}
#[test]
fn period_cmp_cross_freq_returns_none() {
let m = Period::new(1, PeriodFreq::Monthly);
let q = Period::new(1, PeriodFreq::Quarterly);
assert_eq!(m.cmp_same_freq(&q), None);
}
#[test]
fn period_arithmetic_matches_seeded_oracles_bac28() {
use std::cmp::Ordering;
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn freq_for(raw: u64) -> PeriodFreq {
match raw % 9 {
0 => PeriodFreq::Annual,
1 => PeriodFreq::Quarterly,
2 => PeriodFreq::Monthly,
3 => PeriodFreq::Weekly,
4 => PeriodFreq::Daily,
5 => PeriodFreq::Business,
6 => PeriodFreq::Hourly,
7 => PeriodFreq::Minutely,
_ => PeriodFreq::Secondly,
}
}
fn different_freq(freq: PeriodFreq) -> PeriodFreq {
match freq {
PeriodFreq::Annual => PeriodFreq::Quarterly,
PeriodFreq::Quarterly => PeriodFreq::Monthly,
PeriodFreq::Monthly => PeriodFreq::Weekly,
PeriodFreq::Weekly => PeriodFreq::Daily,
PeriodFreq::Daily => PeriodFreq::Business,
PeriodFreq::Business => PeriodFreq::Hourly,
PeriodFreq::Hourly => PeriodFreq::Minutely,
PeriodFreq::Minutely => PeriodFreq::Secondly,
PeriodFreq::Secondly => PeriodFreq::Annual,
}
}
fn assert_period_case(
case: usize,
freq: PeriodFreq,
ordinal: i64,
shift_by: i64,
other_ordinal: i64,
) {
let period = Period::new(ordinal, freq);
let shifted = period.shift(shift_by);
assert_eq!(
shifted.ordinal,
ordinal.saturating_add(shift_by),
"case {case}: shift ordinal"
);
assert_eq!(shifted.freq, freq, "case {case}: shift freq");
let same_freq_other = Period::new(other_ordinal, freq);
assert_eq!(
period.diff(&same_freq_other),
Some(ordinal.saturating_sub(other_ordinal)),
"case {case}: same-freq diff"
);
assert_eq!(
period.cmp_same_freq(&same_freq_other),
Some(ordinal.cmp(&other_ordinal)),
"case {case}: same-freq cmp"
);
let cross_freq_other = Period::new(other_ordinal, different_freq(freq));
assert_eq!(
period.diff(&cross_freq_other),
None,
"case {case}: cross-freq diff"
);
assert_eq!(
period.cmp_same_freq(&cross_freq_other),
None,
"case {case}: cross-freq cmp"
);
}
assert_period_case(usize::MAX, PeriodFreq::Daily, i64::MAX - 2, 10, i64::MIN);
assert_period_case(
usize::MAX - 1,
PeriodFreq::Daily,
i64::MIN + 2,
-10,
i64::MAX,
);
assert_eq!(
Period::new(10, PeriodFreq::Monthly)
.cmp_same_freq(&Period::new(10, PeriodFreq::Monthly)),
Some(Ordering::Equal)
);
let mut seed = 0xbac2_8d1f_0d1c_5eed_u64;
for case in 0..260 {
let freq = freq_for(next(&mut seed));
let ordinal = match case % 53 {
0 => i64::MAX - (next(&mut seed) % 8) as i64,
1 => i64::MIN + (next(&mut seed) % 8) as i64,
_ => (next(&mut seed) % 200_001) as i64 - 100_000,
};
let shift_by = match case % 47 {
0 => 512,
1 => -512,
_ => (next(&mut seed) % 4097) as i64 - 2048,
};
let other_ordinal = match case % 41 {
0 => i64::MAX,
1 => i64::MIN,
_ => (next(&mut seed) % 200_001) as i64 - 100_000,
};
assert_period_case(case, freq, ordinal, shift_by, other_ordinal);
}
}
#[test]
fn period_display_is_pandas_calendar_string() {
assert_eq!(
Period::new(216, PeriodFreq::Quarterly).to_string(),
"2024Q1"
);
assert_eq!(Period::new(54, PeriodFreq::Annual).to_string(), "2024");
assert_eq!(Period::new(650, PeriodFreq::Monthly).to_string(), "2024-03");
assert_eq!(
Period::new(fp_days("2024-01-15"), PeriodFreq::Daily).to_string(),
"2024-01-15"
);
assert_eq!(
Scalar::Period(Period::new(i64::MIN, PeriodFreq::Daily)).to_string(),
"NaT"
);
}
#[cfg(test)]
fn fp_days(ymd: &str) -> i64 {
Period::parse(ymd).expect("daily period").ordinal
}
#[test]
fn period_roundtrips_through_serde_json() {
let p = Period::new(42, PeriodFreq::Weekly);
let json = serde_json::to_string(&p).expect("serialize");
let back: Period = serde_json::from_str(&json).expect("deserialize");
assert_eq!(p, back);
}
use super::period_range;
#[test]
fn period_range_zero_periods_is_empty() {
let start = Period::new(216, PeriodFreq::Quarterly);
assert!(period_range(start, 0).is_empty());
}
#[test]
fn period_range_single_period_returns_start_only() {
let start = Period::new(216, PeriodFreq::Quarterly);
let r = period_range(start, 1);
assert_eq!(r.len(), 1);
assert_eq!(r[0], start);
}
#[test]
fn period_range_increments_ordinal_by_one_per_step() {
let start = Period::new(216, PeriodFreq::Quarterly);
let r = period_range(start, 4);
assert_eq!(r.len(), 4);
assert_eq!(r[0].ordinal, 216);
assert_eq!(r[1].ordinal, 217);
assert_eq!(r[2].ordinal, 218);
assert_eq!(r[3].ordinal, 219);
}
#[test]
fn period_range_preserves_frequency() {
let start = Period::new(0, PeriodFreq::Monthly);
let r = period_range(start, 12);
assert!(r.iter().all(|p| p.freq == PeriodFreq::Monthly));
}
#[test]
fn period_range_negative_starting_ordinal_works() {
let start = Period::new(-3, PeriodFreq::Annual);
let r = period_range(start, 5);
assert_eq!(
r.iter().map(|p| p.ordinal).collect::<Vec<_>>(),
vec![-3, -2, -1, 0, 1]
);
}
#[test]
fn period_range_large_n_does_not_panic() {
let start = Period::new(0, PeriodFreq::Monthly);
let r = period_range(start, 1024);
assert_eq!(r.len(), 1024);
assert_eq!(r[1023].ordinal, 1023);
}
#[test]
fn period_range_matches_seeded_ordinal_oracle_z3zh2() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*seed
}
fn freq_for(raw: u64) -> PeriodFreq {
match raw % 9 {
0 => PeriodFreq::Annual,
1 => PeriodFreq::Quarterly,
2 => PeriodFreq::Monthly,
3 => PeriodFreq::Weekly,
4 => PeriodFreq::Daily,
5 => PeriodFreq::Business,
6 => PeriodFreq::Hourly,
7 => PeriodFreq::Minutely,
_ => PeriodFreq::Secondly,
}
}
fn assert_oracle_case(case: usize, start: Period, periods: usize) {
let got = period_range(start, periods);
assert_eq!(got.len(), periods, "case {case}: length");
for (position, period) in got.iter().enumerate() {
let expected_ordinal = start.ordinal.saturating_add(position as i64);
assert_eq!(
period.ordinal, expected_ordinal,
"case {case}: ordinal at {position}"
);
assert_eq!(period.freq, start.freq, "case {case}: freq at {position}");
}
}
assert_oracle_case(usize::MAX, Period::new(42, PeriodFreq::Monthly), 0);
assert_oracle_case(
usize::MAX - 1,
Period::new(i64::MAX - 3, PeriodFreq::Daily),
8,
);
let mut seed = 0x9e21_0d1c_5eed_0421_u64;
for case in 0..260 {
let freq = freq_for(next(&mut seed));
let periods = (next(&mut seed) % 80) as usize;
let start_ordinal = if case % 37 == 0 {
i64::MAX - 7
} else {
(next(&mut seed) % 20_001) as i64 - 10_000
};
assert_oracle_case(case, Period::new(start_ordinal, freq), periods);
}
}
use super::{TypeError, interval_range_by_periods, interval_range_by_step};
#[test]
fn interval_range_by_periods_matches_pandas_default_case() {
let bins = interval_range_by_periods(0.0, 10.0, 5, IntervalClosed::Right);
assert_eq!(bins.len(), 5);
for (i, bin) in bins.iter().enumerate() {
assert_eq!(bin.left, (i as f64) * 2.0);
assert_eq!(bin.right, ((i + 1) as f64) * 2.0);
assert_eq!(bin.closed, IntervalClosed::Right);
}
}
#[test]
fn interval_range_by_periods_final_edge_is_exact_end() {
let bins = interval_range_by_periods(0.0, 1.0, 7, IntervalClosed::Right);
assert_eq!(bins.last().unwrap().right, 1.0);
}
#[test]
fn interval_range_by_periods_zero_periods_is_empty() {
assert!(interval_range_by_periods(0.0, 10.0, 0, IntervalClosed::Right).is_empty());
}
#[test]
fn interval_range_by_periods_reversed_range_is_empty() {
assert!(interval_range_by_periods(10.0, 0.0, 5, IntervalClosed::Right).is_empty());
}
#[test]
fn interval_range_by_periods_preserves_closed_policy() {
for closed in [
IntervalClosed::Left,
IntervalClosed::Right,
IntervalClosed::Both,
IntervalClosed::Neither,
] {
let bins = interval_range_by_periods(0.0, 4.0, 2, closed);
assert!(bins.iter().all(|b| b.closed == closed));
}
}
#[test]
fn interval_range_by_step_matches_pandas_default_case() {
let bins = interval_range_by_step(0.0, 10.0, 2.0, IntervalClosed::Right).expect("ok");
assert_eq!(bins.len(), 5);
assert_eq!(bins[0].left, 0.0);
assert_eq!(bins[4].right, 10.0);
}
#[test]
fn interval_range_by_step_rejects_non_positive_step() {
assert!(matches!(
interval_range_by_step(0.0, 10.0, 0.0, IntervalClosed::Right),
Err(TypeError::InvalidIntervalStep { .. })
));
assert!(matches!(
interval_range_by_step(0.0, 10.0, -2.0, IntervalClosed::Right),
Err(TypeError::InvalidIntervalStep { .. })
));
assert!(matches!(
interval_range_by_step(0.0, 10.0, f64::NAN, IntervalClosed::Right),
Err(TypeError::InvalidIntervalStep { .. })
));
assert!(matches!(
interval_range_by_step(0.0, 10.0, f64::INFINITY, IntervalClosed::Right),
Err(TypeError::InvalidIntervalStep { .. })
));
}
#[test]
fn interval_range_by_step_rejects_non_dividing_step() {
assert!(matches!(
interval_range_by_step(0.0, 10.0, 3.0, IntervalClosed::Right),
Err(TypeError::IntervalStepDoesNotDivide { .. })
));
}
#[test]
fn interval_range_by_step_reversed_range_is_empty() {
let bins = interval_range_by_step(10.0, 0.0, 2.0, IntervalClosed::Right).expect("ok");
assert!(bins.is_empty());
}
#[test]
fn interval_range_by_step_degenerate_zero_span_is_empty() {
let bins = interval_range_by_step(5.0, 5.0, 1.0, IntervalClosed::Right).expect("ok");
assert!(bins.is_empty());
}
#[test]
fn interval_range_by_step_accepts_float_step_within_tolerance() {
let bins = interval_range_by_step(0.0, 1.0, 0.1, IntervalClosed::Right).expect("ok");
assert_eq!(bins.len(), 10);
assert_eq!(bins.last().unwrap().right, 1.0);
}
#[test]
fn interval_range_matches_seeded_arithmetic_oracle_t9ozf() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(2862933555777941757)
.wrapping_add(3037000493);
*seed
}
fn closed_for(raw: u64) -> IntervalClosed {
match raw % 4 {
0 => IntervalClosed::Left,
1 => IntervalClosed::Right,
2 => IntervalClosed::Both,
_ => IntervalClosed::Neither,
}
}
fn assert_interval(
case: usize,
kind: &str,
pos: usize,
actual: &Interval,
expected: &Interval,
) {
assert!(
(actual.left - expected.left).abs() < 1e-12,
"case={case} kind={kind} pos={pos}: expected left {}, got {}",
expected.left,
actual.left
);
assert!(
(actual.right - expected.right).abs() < 1e-12,
"case={case} kind={kind} pos={pos}: expected right {}, got {}",
expected.right,
actual.right
);
assert_eq!(
actual.closed, expected.closed,
"case={case} kind={kind} pos={pos}: closed mismatch"
);
}
fn expected_by_periods(
start: f64,
end: f64,
periods: usize,
closed: IntervalClosed,
) -> Vec<Interval> {
if periods == 0 || !start.is_finite() || !end.is_finite() || start >= end {
return Vec::new();
}
let step = (end - start) / periods as f64;
(0..periods)
.map(|pos| {
let left = start + step * pos as f64;
let right = if pos + 1 == periods {
end
} else {
start + step * (pos + 1) as f64
};
Interval::new(left, right, closed)
})
.collect()
}
fn expected_by_step(
start: f64,
end: f64,
step: f64,
closed: IntervalClosed,
) -> Vec<Interval> {
if start >= end {
return Vec::new();
}
let count = ((end - start) / step).round() as usize;
(0..count)
.map(|pos| {
let left = start + step * pos as f64;
let right = if pos + 1 == count {
end
} else {
start + step * (pos + 1) as f64
};
Interval::new(left, right, closed)
})
.collect()
}
assert!(interval_range_by_periods(5.0, 5.0, 4, IntervalClosed::Right).is_empty());
assert!(interval_range_by_periods(5.0, 4.0, 4, IntervalClosed::Right).is_empty());
assert!(
interval_range_by_step(5.0, 5.0, 1.0, IntervalClosed::Right)
.expect("zero span")
.is_empty()
);
let mut seed = 0x171e_7a11_c0de_5eed_u64;
for case in 0..220 {
let start = (next(&mut seed) % 2_001) as f64 / 10.0 - 100.0;
let periods = (next(&mut seed) % 24 + 1) as usize;
let width = (next(&mut seed) % 1_000 + 1) as f64 / 4.0;
let end = start + width;
let closed = closed_for(next(&mut seed));
let actual = interval_range_by_periods(start, end, periods, closed);
let expected = expected_by_periods(start, end, periods, closed);
assert_eq!(
actual.len(),
expected.len(),
"case={case} periods: length mismatch"
);
for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
assert_interval(case, "periods", pos, actual, expected);
}
let step_count = (next(&mut seed) % 20 + 1) as usize;
let step = (next(&mut seed) % 25 + 1) as f64;
let step_end = start + step * step_count as f64;
let actual = interval_range_by_step(start, step_end, step, closed).expect("divides");
let expected = expected_by_step(start, step_end, step, closed);
assert_eq!(
actual.len(),
expected.len(),
"case={case} step: length mismatch"
);
for (pos, (actual, expected)) in actual.iter().zip(expected.iter()).enumerate() {
assert_interval(case, "step", pos, actual, expected);
}
}
}
use super::Timedelta;
#[test]
fn timedelta_add_sums_non_nat() {
let one_hour = Timedelta::NANOS_PER_HOUR;
let one_day = Timedelta::NANOS_PER_DAY;
assert_eq!(Timedelta::add(one_hour, one_day), one_hour + one_day);
}
#[test]
fn timedelta_add_propagates_nat() {
assert_eq!(Timedelta::add(Timedelta::NAT, 100), Timedelta::NAT);
assert_eq!(Timedelta::add(100, Timedelta::NAT), Timedelta::NAT);
assert_eq!(
Timedelta::add(Timedelta::NAT, Timedelta::NAT),
Timedelta::NAT
);
}
#[test]
fn timedelta_add_saturates_on_overflow() {
assert_eq!(Timedelta::add(i64::MAX - 10, 100), i64::MAX);
assert_eq!(Timedelta::add(i64::MIN + 10, -100), i64::MIN);
}
#[test]
fn timedelta_sub_subtracts_non_nat() {
let one_hour = Timedelta::NANOS_PER_HOUR;
assert_eq!(
Timedelta::sub(one_hour, Timedelta::NANOS_PER_MIN),
one_hour - Timedelta::NANOS_PER_MIN
);
}
#[test]
fn timedelta_sub_propagates_nat() {
assert_eq!(Timedelta::sub(Timedelta::NAT, 100), Timedelta::NAT);
assert_eq!(Timedelta::sub(100, Timedelta::NAT), Timedelta::NAT);
}
#[test]
fn timedelta_neg_flips_sign_non_nat() {
assert_eq!(Timedelta::neg(5), -5);
assert_eq!(Timedelta::neg(-5), 5);
assert_eq!(Timedelta::neg(0), 0);
}
#[test]
fn timedelta_neg_preserves_nat() {
assert_eq!(Timedelta::neg(Timedelta::NAT), Timedelta::NAT);
}
#[test]
fn timedelta_abs_returns_magnitude() {
assert_eq!(Timedelta::abs(-5), 5);
assert_eq!(Timedelta::abs(5), 5);
assert_eq!(Timedelta::abs(0), 0);
assert_eq!(Timedelta::abs(Timedelta::NAT), Timedelta::NAT);
}
#[test]
fn timedelta_mul_scalar_scales() {
let three_hours = Timedelta::NANOS_PER_HOUR * 3;
assert_eq!(
Timedelta::mul_scalar(Timedelta::NANOS_PER_HOUR, 3),
three_hours
);
assert_eq!(Timedelta::mul_scalar(100, 0), 0);
assert_eq!(Timedelta::mul_scalar(100, -2), -200);
}
#[test]
fn timedelta_mul_scalar_saturates() {
assert_eq!(Timedelta::mul_scalar(i64::MAX, 2), i64::MAX);
assert_eq!(Timedelta::mul_scalar(i64::MIN + 1, 2), i64::MIN);
}
#[test]
fn timedelta_mul_scalar_propagates_nat() {
assert_eq!(Timedelta::mul_scalar(Timedelta::NAT, 5), Timedelta::NAT);
}
#[test]
fn timedelta_div_scalar_floor_divides() {
assert_eq!(Timedelta::div_scalar(100, 3), 33);
assert_eq!(Timedelta::div_scalar(-100, 3), -34);
assert_eq!(Timedelta::div_scalar(100, -3), -34);
assert_eq!(Timedelta::div_scalar(-100, -3), 33);
}
#[test]
fn timedelta_div_scalar_zero_divisor_returns_nat() {
assert_eq!(Timedelta::div_scalar(100, 0), Timedelta::NAT);
}
#[test]
fn timedelta_div_scalar_min_neg_one_propagates_nat() {
assert_eq!(Timedelta::div_scalar(i64::MIN, -1), Timedelta::NAT);
assert_eq!(Timedelta::div_scalar(i64::MIN + 1, -1), i64::MAX);
}
#[test]
fn timedelta_div_scalar_propagates_nat() {
assert_eq!(Timedelta::div_scalar(Timedelta::NAT, 10), Timedelta::NAT);
}
#[test]
fn timedelta_div_timedelta_returns_float_ratio() {
let two_hours = Timedelta::NANOS_PER_HOUR * 2;
let one_hour = Timedelta::NANOS_PER_HOUR;
assert!((Timedelta::div_timedelta(two_hours, one_hour) - 2.0).abs() < 1e-12);
assert!((Timedelta::div_timedelta(one_hour, two_hours) - 0.5).abs() < 1e-12);
}
#[test]
fn timedelta_div_timedelta_nat_returns_nan() {
assert!(Timedelta::div_timedelta(Timedelta::NAT, 100).is_nan());
assert!(Timedelta::div_timedelta(100, Timedelta::NAT).is_nan());
}
use super::Timestamp;
#[test]
fn timestamp_from_nanos_is_naive_utc() {
let ts = Timestamp::from_nanos(1_700_000_000_000_000_000);
assert_eq!(ts.nanos, 1_700_000_000_000_000_000);
assert_eq!(ts.tz, None);
assert!(!ts.is_nat());
}
#[test]
fn timestamp_from_nanos_tz_carries_tz_name() {
let ts = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
assert_eq!(ts.tz.as_deref(), Some("US/Eastern"));
}
#[test]
fn timestamp_now_returns_current_time() {
let before = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
let ts = Timestamp::now();
let after = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_nanos() as i64;
assert!(ts.nanos >= before);
assert!(ts.nanos <= after);
assert!(!ts.is_nat());
}
#[test]
fn timestamp_today_returns_midnight() {
let ts = Timestamp::today();
assert!(!ts.is_nat());
assert_eq!(ts.hour(), Some(0));
assert_eq!(ts.minute(), Some(0));
assert_eq!(ts.second(), Some(0));
}
#[test]
fn timestamp_add_timedelta_shifts_nanos_and_preserves_tz() {
let ts = Timestamp::from_nanos_tz(0, "US/Eastern");
let one_day = Timedelta::NANOS_PER_DAY;
let shifted = ts.add_timedelta(one_day);
assert_eq!(shifted.nanos, one_day);
assert_eq!(shifted.tz.as_deref(), Some("US/Eastern"));
}
#[test]
fn timestamp_add_timedelta_saturates_on_overflow() {
let ts = Timestamp::from_nanos(i64::MAX - 10);
let shifted = ts.add_timedelta(100);
assert_eq!(shifted.nanos, i64::MAX);
}
#[test]
fn timestamp_add_timedelta_propagates_nat() {
assert!(Timestamp::nat().add_timedelta(100).is_nat());
assert!(
Timestamp::from_nanos(0)
.add_timedelta(Timedelta::NAT)
.is_nat()
);
}
#[test]
fn timestamp_sub_timedelta_shifts_backward() {
let ts = Timestamp::from_nanos(1_000);
let shifted = ts.sub_timedelta(Timedelta::NANOS_PER_MICRO);
assert_eq!(shifted.nanos, 0);
}
#[test]
fn timestamp_sub_timestamp_returns_timedelta_nanos() {
let t0 = Timestamp::from_nanos(0);
let t1 = Timestamp::from_nanos(Timedelta::NANOS_PER_HOUR);
assert_eq!(t1.sub_timestamp(&t0), Timedelta::NANOS_PER_HOUR);
assert_eq!(t0.sub_timestamp(&t1), -Timedelta::NANOS_PER_HOUR);
}
#[test]
fn timestamp_sub_timestamp_nat_propagates() {
let ts = Timestamp::from_nanos(1_000);
assert_eq!(Timestamp::nat().sub_timestamp(&ts), Timedelta::NAT);
assert_eq!(ts.sub_timestamp(&Timestamp::nat()), Timedelta::NAT);
}
#[test]
fn timestamp_semantic_eq_treats_two_nat_as_equal() {
assert!(Timestamp::nat().semantic_eq(&Timestamp::nat()));
assert!(!Timestamp::nat().semantic_eq(&Timestamp::from_nanos(0)));
assert!(!Timestamp::from_nanos(0).semantic_eq(&Timestamp::nat()));
}
#[test]
fn timestamp_partial_cmp_orders_by_nanos_nat_is_incomparable() {
use std::cmp::Ordering;
let a = Timestamp::from_nanos(0);
let b = Timestamp::from_nanos(100);
assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
assert_eq!(b.partial_cmp(&a), Some(Ordering::Greater));
assert_eq!(a.partial_cmp(&a), Some(Ordering::Equal));
assert_eq!(a.partial_cmp(&Timestamp::nat()), None);
assert_eq!(Timestamp::nat().partial_cmp(&Timestamp::nat()), None);
}
#[test]
fn timestamp_display_matches_phase2_debug_format() {
assert_eq!(Timestamp::from_nanos(42).to_string(), "Timestamp[42, UTC]");
assert_eq!(
Timestamp::from_nanos_tz(42, "US/Eastern").to_string(),
"Timestamp[42, US/Eastern]"
);
assert_eq!(Timestamp::nat().to_string(), "NaT");
}
#[test]
fn timestamp_value_and_unit_match_pandas_l0edr() {
let ts = Timestamp::from_nanos(1_000_000_123);
assert_eq!(ts.value(), 1_000_000_123);
assert_eq!(ts.unit(), Some("ns"));
let nat = Timestamp::nat();
assert_eq!(nat.value(), Timestamp::NAT);
assert_eq!(nat.unit(), None);
}
#[test]
fn timestamp_numpy_datetime64_materializers_match_value_twksi() {
let ts = Timestamp::from_nanos(1_000_000_123);
assert_eq!(ts.asm8(), ts.value());
assert_eq!(ts.to_datetime64(), ts.value());
assert_eq!(ts.to_numpy(), ts.value());
let nat = Timestamp::nat();
assert_eq!(nat.asm8(), Timestamp::NAT);
assert_eq!(nat.to_datetime64(), Timestamp::NAT);
assert_eq!(nat.to_numpy(), Timestamp::NAT);
}
#[test]
fn timestamp_timestamp_accessor_matches_pandas_microsecond_rounding_py0h3() {
assert_eq!(Timestamp::from_nanos(0).timestamp(), Ok(0.0));
assert_eq!(Timestamp::from_nanos(1_500_000_000).timestamp(), Ok(1.5));
assert_eq!(Timestamp::from_nanos(500).timestamp(), Ok(0.0));
assert_eq!(Timestamp::from_nanos(501).timestamp(), Ok(0.000001));
assert_eq!(Timestamp::from_nanos(2_500).timestamp(), Ok(0.000003));
assert!(matches!(
Timestamp::from_nanos(-500).timestamp(),
Ok(value) if value == -0.0 && value.is_sign_negative()
));
assert_eq!(Timestamp::from_nanos(-2_500).timestamp(), Ok(-0.000003));
assert_eq!(
Timestamp::nat().timestamp(),
Err(TypeError::ValueIsMissing {
kind: NullKind::NaT,
})
);
}
#[test]
fn timestamp_roundtrips_through_serde_json() {
let naive = Timestamp::from_nanos(1_700_000_000_000_000_000);
let json = serde_json::to_string(&naive).expect("serialize");
let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
assert_eq!(naive, back);
let tz_aware = Timestamp::from_nanos_tz(1_700_000_000_000_000_000, "US/Eastern");
let json = serde_json::to_string(&tz_aware).expect("serialize");
let back: Timestamp = serde_json::from_str(&json).expect("deserialize");
assert_eq!(tz_aware, back);
}
#[test]
fn timestamp_is_send_and_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Timestamp>();
}
#[test]
fn timestamp_pre_epoch_accessors_floor_not_truncate() {
let ts = Timestamp::from_nanos(-1);
assert_eq!(ts.year(), Some(1969));
assert_eq!(ts.month(), Some(12));
assert_eq!(ts.day(), Some(31));
assert_eq!(ts.hour(), Some(23));
assert_eq!(ts.minute(), Some(59));
assert_eq!(ts.second(), Some(59));
assert_eq!(ts.microsecond(), Some(999_999));
assert_eq!(ts.nanosecond(), Some(999));
assert_eq!(ts.dayofweek(), Some(2));
let noon = Timestamp::from_nanos(-43200 * Timedelta::NANOS_PER_SEC);
assert_eq!(noon.year(), Some(1969));
assert_eq!(noon.month(), Some(12));
assert_eq!(noon.day(), Some(31));
assert_eq!(noon.hour(), Some(12));
let midnight = Timestamp::from_nanos(-Timedelta::NANOS_PER_DAY);
assert_eq!(midnight.year(), Some(1969));
assert_eq!(midnight.day(), Some(31));
assert_eq!(midnight.hour(), Some(0));
}
#[test]
fn timestamp_floor_to_rounds_down() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let twelve_thirty_four =
twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
let ts = Timestamp::from_nanos(twelve_thirty_four);
let floored = ts.floor_to(h);
assert_eq!(floored.nanos, twelve_h);
}
#[test]
fn timestamp_floor_to_handles_already_aligned() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let ts = Timestamp::from_nanos(twelve_h);
assert_eq!(ts.floor_to(h).nanos, twelve_h);
}
#[test]
fn timestamp_floor_to_handles_negative_nanos() {
let ts = Timestamp::from_nanos(-100);
assert_eq!(ts.floor_to(60).nanos, -120);
}
#[test]
fn timestamp_floor_to_returns_nat_on_axis_underflow_30hdi() {
let ts = Timestamp::from_nanos(i64::MIN + 1);
assert!(ts.floor_to(10).is_nat());
let safe = Timestamp::from_nanos(i64::MIN + 10);
assert_eq!(safe.floor_to(10).nanos, i64::MIN + 8);
let tz = Timestamp::from_nanos_tz(-100, "UTC").floor_to(60);
assert_eq!(tz.nanos, -120);
assert_eq!(tz.tz.as_deref(), Some("UTC"));
}
#[test]
fn timestamp_ceil_to_rounds_up() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let thirteen_h = h * 13;
let twelve_thirty_four =
twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
let ts = Timestamp::from_nanos(twelve_thirty_four);
assert_eq!(ts.ceil_to(h).nanos, thirteen_h);
}
#[test]
fn timestamp_ceil_to_no_op_on_aligned() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let ts = Timestamp::from_nanos(twelve_h);
assert_eq!(ts.ceil_to(h).nanos, twelve_h);
}
#[test]
fn timestamp_round_to_rounds_to_nearest() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let twelve_thirty_one_sec =
twelve_h + Timedelta::NANOS_PER_MIN * 30 + Timedelta::NANOS_PER_SEC;
let ts = Timestamp::from_nanos(twelve_thirty_one_sec);
assert_eq!(ts.round_to(h).nanos, h * 13);
let twelve_twenty_nine_sec =
twelve_h + Timedelta::NANOS_PER_MIN * 29 + Timedelta::NANOS_PER_SEC * 59;
let ts = Timestamp::from_nanos(twelve_twenty_nine_sec);
assert_eq!(ts.round_to(h).nanos, twelve_h);
}
#[test]
fn timestamp_round_to_bankers_tie_to_even() {
assert_eq!(Timestamp::from_nanos(5).round_to(10).nanos, 0);
assert_eq!(Timestamp::from_nanos(15).round_to(10).nanos, 20);
assert_eq!(Timestamp::from_nanos(25).round_to(10).nanos, 20);
assert_eq!(Timestamp::from_nanos(35).round_to(10).nanos, 40);
}
#[test]
fn timestamp_round_to_zero_unit_returns_nat() {
let ts = Timestamp::from_nanos(100);
assert!(ts.round_to(0).is_nat());
assert!(ts.floor_to(0).is_nat());
assert!(ts.ceil_to(0).is_nat());
}
#[test]
fn timestamp_round_to_negative_unit_returns_nat() {
let ts = Timestamp::from_nanos(100);
assert!(ts.round_to(-10).is_nat());
assert!(ts.floor_to(-10).is_nat());
assert!(ts.ceil_to(-10).is_nat());
}
#[test]
fn timestamp_rounding_propagates_nat() {
let nat = Timestamp::nat();
assert!(nat.floor_to(60).is_nat());
assert!(nat.ceil_to(60).is_nat());
assert!(nat.round_to(60).is_nat());
}
#[test]
fn timestamp_rounding_preserves_tz() {
let ts = Timestamp::from_nanos_tz(100, "US/Eastern");
assert_eq!(ts.floor_to(60).tz.as_deref(), Some("US/Eastern"));
assert_eq!(ts.ceil_to(60).tz.as_deref(), Some("US/Eastern"));
assert_eq!(ts.round_to(60).tz.as_deref(), Some("US/Eastern"));
}
#[test]
fn timestamp_floor_to_unit_h_rounds_to_hour() {
let h = Timedelta::NANOS_PER_HOUR;
let twelve_h = h * 12;
let twelve_thirty_four =
twelve_h + Timedelta::NANOS_PER_MIN * 34 + Timedelta::NANOS_PER_SEC * 56;
let ts = Timestamp::from_nanos(twelve_thirty_four);
assert_eq!(ts.floor_to_unit("H").nanos, twelve_h);
assert_eq!(ts.floor_to_unit("h").nanos, twelve_h);
assert_eq!(ts.floor_to_unit("hour").nanos, twelve_h);
assert_eq!(ts.floor_to_unit("hours").nanos, twelve_h);
assert_eq!(ts.floor_to_unit("hr").nanos, twelve_h);
}
#[test]
fn timestamp_ceil_to_unit_d_rounds_to_day() {
let h = Timedelta::NANOS_PER_HOUR;
let d = Timedelta::NANOS_PER_DAY;
let twelve_thirty_four = h * 12 + Timedelta::NANOS_PER_MIN * 34;
let ts = Timestamp::from_nanos(twelve_thirty_four);
assert_eq!(ts.ceil_to_unit("D").nanos, d);
assert_eq!(ts.ceil_to_unit("day").nanos, d);
assert_eq!(ts.ceil_to_unit("days").nanos, d);
}
#[test]
fn timestamp_round_to_unit_min_rounds_to_minute() {
let m = Timedelta::NANOS_PER_MIN;
let twelve_thirty_four_thirty_one =
Timedelta::NANOS_PER_HOUR * 12 + m * 34 + Timedelta::NANOS_PER_SEC * 31;
let ts = Timestamp::from_nanos(twelve_thirty_four_thirty_one);
let expected = Timedelta::NANOS_PER_HOUR * 12 + m * 35;
assert_eq!(ts.round_to_unit("min").nanos, expected);
assert_eq!(ts.round_to_unit("T").nanos, expected); assert_eq!(ts.round_to_unit("minute").nanos, expected);
}
#[test]
fn timestamp_floor_ceil_round_aliases_match_unit_methods_li897() {
let ts = Timestamp::from_nanos(
Timedelta::NANOS_PER_HOUR * 12
+ Timedelta::NANOS_PER_MIN * 34
+ Timedelta::NANOS_PER_SEC * 31,
);
assert_eq!(ts.floor("H"), ts.floor_to_unit("H"));
assert_eq!(ts.ceil("D"), ts.ceil_to_unit("D"));
assert_eq!(ts.round("min"), ts.round_to_unit("min"));
}
#[test]
fn timestamp_normalize_floors_to_day_and_preserves_tz_455op() {
let ts = Timestamp::from_nanos_tz(
Timedelta::NANOS_PER_DAY * 3
+ Timedelta::NANOS_PER_HOUR * 12
+ Timedelta::NANOS_PER_MIN * 34,
"US/Eastern",
);
let normalized = ts.normalize();
assert_eq!(normalized.nanos, Timedelta::NANOS_PER_DAY * 3);
assert_eq!(normalized.tz.as_deref(), Some("US/Eastern"));
assert!(Timestamp::nat().normalize().is_nat());
}
#[test]
fn timestamp_unit_rounding_unknown_unit_returns_nat() {
let ts = Timestamp::from_nanos(100);
assert!(ts.floor_to_unit("fortnight").is_nat());
assert!(ts.ceil_to_unit("century").is_nat());
assert!(ts.round_to_unit("xyz").is_nat());
}
#[test]
fn timestamp_unit_rounding_propagates_nat() {
let nat = Timestamp::nat();
assert!(nat.floor_to_unit("H").is_nat());
assert!(nat.ceil_to_unit("H").is_nat());
assert!(nat.round_to_unit("H").is_nat());
}
#[test]
fn timestamp_unit_rounding_preserves_tz() {
let ts = Timestamp::from_nanos_tz(Timedelta::NANOS_PER_HOUR * 12 + 100, "US/Eastern");
assert_eq!(ts.floor_to_unit("H").tz.as_deref(), Some("US/Eastern"));
assert_eq!(ts.ceil_to_unit("H").tz.as_deref(), Some("US/Eastern"));
assert_eq!(ts.round_to_unit("H").tz.as_deref(), Some("US/Eastern"));
}
#[test]
fn timedelta_unit_to_nanos_is_now_public_and_matches_pandas_aliases() {
assert_eq!(
Timedelta::unit_to_nanos("W"),
Some(Timedelta::NANOS_PER_WEEK)
);
assert_eq!(
Timedelta::unit_to_nanos("D"),
Some(Timedelta::NANOS_PER_DAY)
);
assert_eq!(
Timedelta::unit_to_nanos("H"),
Some(Timedelta::NANOS_PER_HOUR)
);
assert_eq!(
Timedelta::unit_to_nanos("min"),
Some(Timedelta::NANOS_PER_MIN)
);
assert_eq!(
Timedelta::unit_to_nanos("s"),
Some(Timedelta::NANOS_PER_SEC)
);
assert_eq!(
Timedelta::unit_to_nanos("ms"),
Some(Timedelta::NANOS_PER_MILLI)
);
assert_eq!(
Timedelta::unit_to_nanos("us"),
Some(Timedelta::NANOS_PER_MICRO)
);
assert_eq!(Timedelta::unit_to_nanos("ns"), Some(1));
assert_eq!(Timedelta::unit_to_nanos(""), Some(Timedelta::NANOS_PER_DAY));
assert_eq!(Timedelta::unit_to_nanos("century"), None);
}
#[test]
fn timestamp_isoformat_basic() {
let ts = Timestamp::from_nanos(0);
assert_eq!(ts.isoformat(), "1970-01-01T00:00:00");
let ts_utc = Timestamp::from_nanos_tz(0, "UTC");
assert_eq!(ts_utc.isoformat(), "1970-01-01T00:00:00+00:00");
let ts_tz = Timestamp::from_nanos_tz(
Timedelta::NANOS_PER_DAY
+ Timedelta::NANOS_PER_HOUR * 14
+ Timedelta::NANOS_PER_MIN * 30,
"America/New_York",
);
assert!(ts_tz.isoformat().contains("1970-01-02T14:30:00"));
assert!(ts_tz.isoformat().contains("[America/New_York]"));
assert_eq!(Timestamp::nat().isoformat(), "NaT");
}
#[test]
fn timestamp_isoformat_pre_epoch_subsecond_uses_floor_day_263m5() {
assert_eq!(
Timestamp::from_nanos(-1).isoformat(),
"1969-12-31T23:59:59.999999999"
);
assert_eq!(
Timestamp::from_nanos(-Timedelta::NANOS_PER_SEC).isoformat(),
"1969-12-31T23:59:59"
);
assert_eq!(
Timestamp::from_nanos(-Timedelta::NANOS_PER_DAY).isoformat(),
"1969-12-31T00:00:00"
);
assert_eq!(
Timestamp::from_nanos_tz(-1, "UTC").isoformat(),
"1969-12-31T23:59:59.999999999+00:00"
);
}
#[test]
fn timestamp_isoformat_preserves_nanosecond_fraction_4r99y() {
assert_eq!(
Timestamp::from_nanos(123_456_789).isoformat(),
"1970-01-01T00:00:00.123456789"
);
assert_eq!(
Timestamp::from_nanos(123_456_000).isoformat(),
"1970-01-01T00:00:00.123456"
);
assert_eq!(
Timestamp::from_nanos(123_000_000).isoformat(),
"1970-01-01T00:00:00.123000"
);
assert_eq!(
Timestamp::from_nanos_tz(1, "UTC").isoformat(),
"1970-01-01T00:00:00.000000001+00:00"
);
}
#[test]
fn timestamp_strftime_basic() {
let ts = Timestamp::from_nanos(
Timedelta::NANOS_PER_DAY * 365
+ Timedelta::NANOS_PER_HOUR * 9
+ Timedelta::NANOS_PER_MIN * 15,
);
assert_eq!(ts.strftime("%Y-%m-%d"), "1971-01-01");
assert_eq!(ts.strftime("%H:%M:%S"), "09:15:00");
assert_eq!(ts.strftime("%Y/%m/%d %H:%M"), "1971/01/01 09:15");
assert_eq!(Timestamp::nat().strftime("%Y-%m-%d"), "NaT");
}
#[test]
fn timestamp_day_name_and_month_name() {
let ts = Timestamp::from_nanos(0);
assert_eq!(ts.day_name(), "Thursday");
assert_eq!(ts.month_name(), "January");
let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
assert_eq!(ts2.day_name(), "Friday");
assert_eq!(ts2.month_name(), "January");
assert_eq!(Timestamp::nat().day_name(), "NaT");
assert_eq!(Timestamp::nat().month_name(), "NaT");
}
#[test]
fn timestamp_replace_validates_components_zw0y2() {
let ts = Timestamp::parse("2024-01-15T10:30:45.123456789").unwrap();
let replaced = ts.replace(
Some(2024),
Some(2),
Some(29),
Some(23),
Some(59),
Some(58),
Some(987_654),
Some(321),
);
assert_eq!(replaced.year(), Some(2024));
assert_eq!(replaced.month(), Some(2));
assert_eq!(replaced.day(), Some(29));
assert_eq!(replaced.hour(), Some(23));
assert_eq!(replaced.minute(), Some(59));
assert_eq!(replaced.second(), Some(58));
assert_eq!(replaced.microsecond(), Some(987_654));
assert_eq!(replaced.nanosecond(), Some(321));
let tz = Timestamp::from_nanos_tz(ts.nanos, "UTC");
assert_eq!(
tz.replace(None, Some(2), Some(29), None, None, None, None, None)
.tz
.as_deref(),
Some("UTC")
);
assert!(
ts.replace(None, Some(13), None, None, None, None, None, None)
.is_nat()
);
assert!(
ts.replace(Some(2023), Some(2), Some(29), None, None, None, None, None)
.is_nat()
);
assert!(
ts.replace(None, None, None, Some(24), None, None, None, None)
.is_nat()
);
assert!(
ts.replace(None, None, None, None, Some(60), None, None, None)
.is_nat()
);
assert!(
ts.replace(None, None, None, None, None, Some(60), None, None)
.is_nat()
);
assert!(
ts.replace(None, None, None, None, None, None, Some(1_000_000), None)
.is_nat()
);
assert!(
ts.replace(None, None, None, None, None, None, None, Some(1_000))
.is_nat()
);
assert!(
Timestamp::nat()
.replace(Some(2024), Some(1), Some(1), None, None, None, None, None)
.is_nat()
);
}
#[test]
fn timestamp_component_accessors() {
let ts = Timestamp::from_nanos(0);
assert_eq!(ts.year(), Some(1970));
assert_eq!(ts.month(), Some(1));
assert_eq!(ts.day(), Some(1));
assert_eq!(ts.hour(), Some(0));
assert_eq!(ts.minute(), Some(0));
assert_eq!(ts.second(), Some(0));
assert_eq!(ts.microsecond(), Some(0));
assert_eq!(ts.nanosecond(), Some(0));
let ts2 = Timestamp::from_nanos(
Timedelta::NANOS_PER_DAY * 365
+ Timedelta::NANOS_PER_HOUR * 14
+ Timedelta::NANOS_PER_MIN * 30
+ Timedelta::NANOS_PER_SEC * 45
+ 123_456_789,
);
assert_eq!(ts2.year(), Some(1971));
assert_eq!(ts2.month(), Some(1));
assert_eq!(ts2.day(), Some(1));
assert_eq!(ts2.hour(), Some(14));
assert_eq!(ts2.minute(), Some(30));
assert_eq!(ts2.second(), Some(45));
assert_eq!(ts2.microsecond(), Some(123456));
assert_eq!(ts2.nanosecond(), Some(789));
assert_eq!(Timestamp::nat().year(), None);
assert_eq!(Timestamp::nat().month(), None);
assert_eq!(Timestamp::nat().day(), None);
}
#[test]
fn timestamp_dayofweek_dayofyear_quarter() {
let ts = Timestamp::from_nanos(0);
assert_eq!(ts.dayofweek(), Some(3));
assert_eq!(ts.weekday(), Some(3));
assert_eq!(ts.dayofyear(), Some(1));
assert_eq!(ts.quarter(), Some(1));
let ts2 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 90);
assert_eq!(ts2.quarter(), Some(2));
let ts3 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 365);
assert_eq!(ts3.dayofyear(), Some(1));
assert_eq!(ts3.dayofweek(), Some(4));
assert_eq!(Timestamp::nat().dayofweek(), None);
assert_eq!(Timestamp::nat().dayofyear(), None);
assert_eq!(Timestamp::nat().quarter(), None);
}
#[test]
fn timestamp_is_boundary_methods() {
let jan1 = Timestamp::from_nanos(0);
assert_eq!(jan1.is_leap_year(), Some(false));
assert_eq!(jan1.is_month_start(), Some(true));
assert_eq!(jan1.is_month_end(), Some(false));
assert_eq!(jan1.is_quarter_start(), Some(true));
assert_eq!(jan1.is_quarter_end(), Some(false));
assert_eq!(jan1.is_year_start(), Some(true));
assert_eq!(jan1.is_year_end(), Some(false));
let dec31 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 364);
assert_eq!(dec31.is_month_start(), Some(false));
assert_eq!(dec31.is_month_end(), Some(true));
assert_eq!(dec31.is_quarter_end(), Some(true));
assert_eq!(dec31.is_year_end(), Some(true));
assert_eq!(Timestamp::nat().is_leap_year(), None);
assert_eq!(Timestamp::nat().is_month_start(), None);
}
#[test]
fn timestamp_days_in_month() {
let jan = Timestamp::from_nanos(0);
assert_eq!(jan.days_in_month(), Some(31));
assert_eq!(jan.daysinmonth(), Some(31));
let feb_non_leap = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 31);
assert_eq!(feb_non_leap.days_in_month(), Some(28));
assert_eq!(Timestamp::nat().days_in_month(), None);
}
#[test]
fn timestamp_weekofyear() {
let jan1 = Timestamp::from_nanos(0);
assert_eq!(jan1.weekofyear(), Some(1));
assert_eq!(jan1.week(), Some(1));
let jan8 = Timestamp::from_nanos(Timedelta::NANOS_PER_DAY * 7);
assert_eq!(jan8.weekofyear(), Some(2));
assert_eq!(Timestamp::nat().weekofyear(), None);
assert_eq!(Timestamp::nat().week(), None);
}
#[test]
fn timestamp_weekofyear_iso_53_week_boundaries() {
fn week_of(date_days: i64) -> Option<i64> {
Timestamp::from_nanos(date_days * Timedelta::NANOS_PER_DAY).weekofyear()
}
assert_eq!(week_of(18_628), Some(53)); assert_eq!(week_of(16_801), Some(53)); assert_eq!(week_of(20_818), Some(53)); assert_eq!(week_of(18_627), Some(53)); assert_eq!(week_of(19_358), Some(52)); assert_eq!(week_of(20_087), Some(1)); assert_eq!(week_of(18_260), Some(1)); }
#[test]
fn iso_weeks_in_year_53_week_years() {
use super::iso_weeks_in_year;
for y in [2004, 2009, 2015, 2020, 2026] {
assert_eq!(iso_weeks_in_year(y), 53, "{y} should have 53 ISO weeks");
}
for y in [2018, 2019, 2021, 2022, 2023, 2024] {
assert_eq!(iso_weeks_in_year(y), 52, "{y} should have 52 ISO weeks");
}
}
#[test]
fn timestamp_to_unit() {
let ts = Timestamp::from_nanos(1_000_000_000);
assert_eq!(ts.to_unit("ns"), Some(1_000_000_000));
assert_eq!(ts.to_unit("us"), Some(1_000_000));
assert_eq!(ts.to_unit("ms"), Some(1_000));
assert_eq!(ts.to_unit("s"), Some(1));
assert_eq!(ts.to_unit("invalid"), None);
assert_eq!(Timestamp::nat().to_unit("ns"), None);
}
#[test]
fn timestamp_toordinal() {
let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
let ts = Timestamp::from_nanos(nanos_2026_01_01);
assert_eq!(ts.toordinal(), Some(738886));
assert_eq!(Timestamp::nat().toordinal(), None);
}
#[test]
fn timestamp_fromordinal() {
let nanos_2026_01_01 = 19723_i64 * 24 * 60 * 60 * 1_000_000_000;
let ts_orig = Timestamp::from_nanos(nanos_2026_01_01);
let ordinal = ts_orig.toordinal().unwrap();
let ts = Timestamp::fromordinal(ordinal);
assert_eq!(ts.year(), ts_orig.year());
assert_eq!(ts.month(), ts_orig.month());
assert_eq!(ts.day(), ts_orig.day());
let nat = Timestamp::fromordinal(0);
assert!(nat.is_nat());
}
#[test]
fn timestamp_fromordinal_guards_nanosecond_overflow_ycvrd() {
const EPOCH_ORDINAL: i64 = 719_163;
let max_day_offset = i64::MAX / Timedelta::NANOS_PER_DAY;
let max_valid_ordinal = EPOCH_ORDINAL + max_day_offset;
let max_valid = Timestamp::fromordinal(max_valid_ordinal);
assert!(!max_valid.is_nat());
assert_eq!(max_valid.nanos, max_day_offset * Timedelta::NANOS_PER_DAY);
assert!(Timestamp::fromordinal(max_valid_ordinal + 1).is_nat());
assert!(Timestamp::fromordinal(i64::MAX).is_nat());
}
#[test]
fn timestamp_ordinal_matches_seeded_epoch_oracle_l2f0p() {
const EPOCH_ORDINAL: i64 = 719_163;
const DAY: i64 = Timedelta::NANOS_PER_DAY;
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3202034522624059733)
.wrapping_add(4354685564936845319);
*seed
}
fn assert_ordinal_case(case: usize, day_offset: i64, subday_nanos: i64) {
let nanos = day_offset.saturating_mul(DAY).saturating_add(subday_nanos);
let ts = Timestamp::from_nanos(nanos);
let expected_day_offset = nanos.div_euclid(DAY);
let expected_ordinal = EPOCH_ORDINAL + expected_day_offset;
assert_eq!(
ts.toordinal(),
Some(expected_ordinal),
"case {case}: toordinal"
);
let midnight = Timestamp::fromordinal(expected_ordinal);
assert_eq!(
midnight.nanos,
expected_day_offset * DAY,
"case {case}: fromordinal nanos"
);
assert_eq!(
midnight.toordinal(),
Some(expected_ordinal),
"case {case}: fromordinal roundtrip"
);
}
assert_eq!(Timestamp::nat().toordinal(), None);
assert!(Timestamp::fromordinal(0).is_nat());
assert!(Timestamp::fromordinal(-1).is_nat());
assert_ordinal_case(usize::MAX, -1, DAY - 1);
assert_ordinal_case(usize::MAX - 1, 0, -1);
assert_ordinal_case(usize::MAX - 2, 19_723, 0);
let mut seed = 0x1f20_f0d1_0a11_0d1e_u64;
for case in 0..260 {
let day_offset = (next(&mut seed) % 40_001) as i64 - 10_000;
let subday_nanos = match case % 7 {
0 => 0,
1 => DAY - 1,
2 => -1,
_ => (next(&mut seed) % (2 * DAY as u64 - 1)) as i64 - (DAY - 1),
};
assert_ordinal_case(case, day_offset, subday_nanos);
}
}
#[test]
fn timestamp_parse_iso8601_date_only() {
let ts = Timestamp::parse("2024-01-15").unwrap();
assert_eq!(ts.year(), Some(2024));
assert_eq!(ts.month(), Some(1));
assert_eq!(ts.day(), Some(15));
assert_eq!(ts.hour(), Some(0));
assert_eq!(ts.minute(), Some(0));
assert_eq!(ts.second(), Some(0));
}
#[test]
fn timestamp_parse_iso8601_datetime() {
let ts = Timestamp::parse("2024-01-15T10:30:45").unwrap();
assert_eq!(ts.year(), Some(2024));
assert_eq!(ts.month(), Some(1));
assert_eq!(ts.day(), Some(15));
assert_eq!(ts.hour(), Some(10));
assert_eq!(ts.minute(), Some(30));
assert_eq!(ts.second(), Some(45));
}
#[test]
fn timestamp_parse_space_separator() {
let ts = Timestamp::parse("2024-01-15 10:30:45").unwrap();
assert_eq!(ts.year(), Some(2024));
assert_eq!(ts.hour(), Some(10));
}
#[test]
fn timestamp_parse_with_fractional_seconds() {
let ts = Timestamp::parse("2024-01-15T10:30:45.123456789").unwrap();
assert_eq!(ts.second(), Some(45));
assert_eq!(ts.microsecond(), Some(123456));
assert_eq!(ts.nanosecond(), Some(789));
}
#[test]
fn timestamp_parse_utc_timezone() {
let ts = Timestamp::parse("2024-01-15T10:30:45Z").unwrap();
assert_eq!(ts.tz, Some("UTC".to_string()));
}
#[test]
fn timestamp_parse_offset_timezone() {
let ts = Timestamp::parse("2024-01-15T10:30:45+05:30").unwrap();
assert_eq!(ts.tz, Some("+05:30".to_string()));
let ts = Timestamp::parse("2024-01-15T10:30:45-05:30").unwrap();
assert_eq!(ts.tz, Some("-05:30".to_string()));
}
#[test]
fn timestamp_parse_rejects_invalid_timezone_offsets_0v676() {
assert!(Timestamp::parse("2024-01-15T10:30:45+bad").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45+0500").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45+24:00").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45+05:60").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45-25:00").is_err());
}
#[test]
fn timestamp_parse_nat() {
let ts = Timestamp::parse("NaT").unwrap();
assert!(ts.is_nat());
let ts2 = Timestamp::parse("nat").unwrap();
assert!(ts2.is_nat());
}
#[test]
fn timestamp_parse_invalid() {
assert!(Timestamp::parse("not a date").is_err());
assert!(Timestamp::parse("2024-13-01").is_err()); assert!(Timestamp::parse("2024-01-32").is_err()); }
#[test]
fn timestamp_parse_rejects_invalid_fractional_seconds_87se2() {
assert!(Timestamp::parse("2024-01-15T10:30:45.").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45.abc").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45.-1").is_err());
assert!(Timestamp::parse("2024-01-15T10:30:45.123").is_err());
let ts = Timestamp::parse("2024-01-15T10:30:45.123456789987").unwrap();
assert_eq!(ts.second(), Some(45));
assert_eq!(ts.microsecond(), Some(123456));
assert_eq!(ts.nanosecond(), Some(789));
}
#[test]
fn timestamp_parse_matches_seeded_iso_component_oracle_1u7a0() {
fn next(seed: &mut u64) -> u64 {
*seed = seed
.wrapping_mul(3935559000370003845)
.wrapping_add(2691343689449507681);
*seed
}
fn leap(year: i64) -> bool {
(year % 4 == 0 && year % 100 != 0) || year % 400 == 0
}
fn month_len(year: i64, month: u32) -> u32 {
match month {
1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
4 | 6 | 9 | 11 => 30,
2 if leap(year) => 29,
2 => 28,
_ => 0,
}
}
struct Components {
year: i64,
month: u32,
day: u32,
hour: u32,
minute: u32,
second: u32,
nanos: u64,
}
fn assert_components(case: usize, text: &str, expected: Components) {
let ts = Timestamp::parse(text).expect("seeded valid timestamp");
let Components {
year,
month,
day,
hour,
minute,
second,
nanos,
} = expected;
assert_eq!(ts.year(), Some(year), "case {case}: year");
assert_eq!(ts.month(), Some(month as i64), "case {case}: month");
assert_eq!(ts.day(), Some(day as i64), "case {case}: day");
assert_eq!(ts.hour(), Some(hour as i64), "case {case}: hour");
assert_eq!(ts.minute(), Some(minute as i64), "case {case}: minute");
assert_eq!(ts.second(), Some(second as i64), "case {case}: second");
assert_eq!(
ts.microsecond(),
Some((nanos / 1000) as i64),
"case {case}: microsecond"
);
assert_eq!(
ts.nanosecond(),
Some((nanos % 1000) as i64),
"case {case}: nanosecond"
);
}
assert!(Timestamp::parse("NaT").expect("NaT parses").is_nat());
assert!(
Timestamp::parse("nAt")
.expect("mixed-case NaT parses")
.is_nat()
);
assert!(Timestamp::parse("1900-02-29").is_err());
assert!(Timestamp::parse("2001-04-31").is_err());
assert!(Timestamp::parse("2024-00-15").is_err());
assert_components(
usize::MAX,
"2000-02-29",
Components {
year: 2000,
month: 2,
day: 29,
hour: 0,
minute: 0,
second: 0,
nanos: 0,
},
);
assert_components(
usize::MAX - 1,
"2024-02-29T23:59:59.000000001",
Components {
year: 2024,
month: 2,
day: 29,
hour: 23,
minute: 59,
second: 59,
nanos: 1,
},
);
let mut seed = 0x15e0_1d50_1f0a_cade_u64;
for case in 0..260 {
let year = 1900 + (next(&mut seed) % 201) as i64;
let month = 1 + (next(&mut seed) % 12) as u32;
let day = 1 + (next(&mut seed) % month_len(year, month) as u64) as u32;
let hour = (next(&mut seed) % 24) as u32;
let minute = (next(&mut seed) % 60) as u32;
let second = (next(&mut seed) % 60) as u32;
let nanos = next(&mut seed) % 1_000_000_000;
match case % 4 {
0 => {
let text = format!("{year:04}-{month:02}-{day:02}");
assert_components(
case,
&text,
Components {
year,
month,
day,
hour: 0,
minute: 0,
second: 0,
nanos: 0,
},
);
}
1 => {
let text =
format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}");
assert_components(
case,
&text,
Components {
year,
month,
day,
hour,
minute,
second,
nanos: 0,
},
);
}
2 => {
let text =
format!("{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}");
assert_components(
case,
&text,
Components {
year,
month,
day,
hour,
minute,
second,
nanos: 0,
},
);
}
_ => {
let text = format!(
"{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{nanos:09}"
);
assert_components(
case,
&text,
Components {
year,
month,
day,
hour,
minute,
second,
nanos,
},
);
}
}
}
}
#[test]
fn period_parse_annual() {
let p = Period::parse("2024").unwrap();
assert_eq!(p.freq(), PeriodFreq::Annual);
assert_eq!(p.ordinal(), 2024 - 1970);
}
#[test]
fn period_parse_quarterly() {
let p = Period::parse("2024Q1").unwrap();
assert_eq!(p.freq(), PeriodFreq::Quarterly);
assert_eq!(p.ordinal(), (2024 - 1970) * 4);
let p2 = Period::parse("2024q3").unwrap();
assert_eq!(p2.freq(), PeriodFreq::Quarterly);
assert_eq!(p2.ordinal(), (2024 - 1970) * 4 + 2);
}
#[test]
fn period_parse_monthly() {
let p = Period::parse("2024-01").unwrap();
assert_eq!(p.freq(), PeriodFreq::Monthly);
assert_eq!(p.ordinal(), (2024 - 1970) * 12);
let p2 = Period::parse("2024-12").unwrap();
assert_eq!(p2.freq(), PeriodFreq::Monthly);
assert_eq!(p2.ordinal(), (2024 - 1970) * 12 + 11);
}
#[test]
fn period_parse_nat() {
let p = Period::parse("NaT").unwrap();
assert_eq!(p.ordinal(), i64::MIN);
}
#[test]
fn period_parse_invalid() {
assert!(Period::parse("not a period").is_err());
assert!(Period::parse("2024Q5").is_err()); assert!(Period::parse("2024-13").is_err()); }
#[test]
fn interval_parse_basic() {
let i = Interval::parse("[0, 1]").unwrap();
assert_eq!(i.left, 0.0);
assert_eq!(i.right, 1.0);
assert_eq!(i.closed, IntervalClosed::Both);
let i2 = Interval::parse("(0, 1)").unwrap();
assert_eq!(i2.left, 0.0);
assert_eq!(i2.right, 1.0);
assert_eq!(i2.closed, IntervalClosed::Neither);
let i3 = Interval::parse("[0, 1)").unwrap();
assert_eq!(i3.closed, IntervalClosed::Left);
let i4 = Interval::parse("(0, 1]").unwrap();
assert_eq!(i4.closed, IntervalClosed::Right);
}
#[test]
fn interval_parse_floats() {
let i = Interval::parse("[-1.5, 2.5)").unwrap();
assert_eq!(i.left, -1.5);
assert_eq!(i.right, 2.5);
assert_eq!(i.closed, IntervalClosed::Left);
}
#[test]
fn interval_parse_invalid() {
assert!(Interval::parse("invalid").is_err());
assert!(Interval::parse("[0]").is_err());
assert!(Interval::parse("0, 1").is_err()); }
}