use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc};
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use crate::schema::derive_macro_utils::ToDataType;
use crate::schema::{ArrayType, DataType, DecimalType, MapType, PrimitiveType, StructField};
use crate::utils::require;
use crate::{DeltaResult, Error};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DecimalData {
bits: i128,
ty: DecimalType,
}
impl DecimalData {
pub fn try_new(bits: impl Into<i128>, ty: DecimalType) -> DeltaResult<Self> {
let bits = bits.into();
require!(
ty.precision() >= get_decimal_precision(bits),
Error::invalid_decimal(format!(
"Decimal value {} exceeds precision {}",
bits,
ty.precision()
))
);
Ok(Self { bits, ty })
}
pub fn bits(&self) -> i128 {
self.bits
}
pub fn ty(&self) -> &DecimalType {
&self.ty
}
pub fn precision(&self) -> u8 {
self.ty.precision()
}
pub fn scale(&self) -> u8 {
self.ty.scale()
}
}
fn get_decimal_precision(value: i128) -> u8 {
value.unsigned_abs().checked_ilog10().map_or(0, |p| p + 1) as _
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ArrayData {
tpe: ArrayType,
elements: Vec<Scalar>,
}
impl ArrayData {
pub fn try_new(
tpe: ArrayType,
elements: impl IntoIterator<Item = impl Into<Scalar>>,
) -> DeltaResult<Self> {
let elements = elements
.into_iter()
.map(|v| {
let v = v.into();
if !tpe.contains_null() && v.is_null() {
Err(Error::schema(
"Array element cannot be null for non-nullable array",
))
} else if *tpe.element_type() != v.data_type() {
Err(Error::Schema(format!(
"Array scalar type mismatch: expected {}, got {}",
tpe.element_type(),
v.data_type()
)))
} else {
Ok(v)
}
})
.try_collect()?;
Ok(Self { tpe, elements })
}
pub fn array_type(&self) -> &ArrayType {
&self.tpe
}
pub fn array_elements(&self) -> &[Scalar] {
&self.elements
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MapData {
data_type: MapType,
pairs: Vec<(Scalar, Scalar)>,
}
impl MapData {
pub fn try_new(
data_type: MapType,
values: impl IntoIterator<Item = (impl Into<Scalar>, impl Into<Scalar>)>,
) -> DeltaResult<Self> {
let key_type = data_type.key_type();
let val_type = data_type.value_type();
let pairs = values
.into_iter()
.map(|(key, val)| {
let (k, v) = (key.into(), val.into());
if k.data_type() != *key_type {
Err(Error::Schema(format!(
"Map scalar type mismatch: expected key type {}, got key type {}",
key_type,
k.data_type()
)))
} else if k.is_null() {
Err(Error::schema("Map key cannot be null"))
} else if v.data_type() != *val_type {
Err(Error::Schema(format!(
"Map scalar type mismatch: expected value type {}, got value type {}",
val_type,
v.data_type()
)))
} else if v.is_null() && !data_type.value_contains_null {
Err(Error::schema(
"Null map value disallowed if map value_contains_null is false",
))
} else {
Ok((k, v))
}
})
.try_collect()?;
Ok(Self { data_type, pairs })
}
pub fn pairs(&self) -> &[(Scalar, Scalar)] {
&self.pairs
}
pub fn map_type(&self) -> &MapType {
&self.data_type
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct StructData {
fields: Vec<StructField>,
values: Vec<Scalar>,
}
impl StructData {
pub fn try_new(fields: Vec<StructField>, values: Vec<Scalar>) -> DeltaResult<Self> {
require!(
fields.len() == values.len(),
Error::invalid_struct_data(format!(
"Incorrect number of values for Struct fields, expected {} got {}",
fields.len(),
values.len()
))
);
for (f, a) in fields.iter().zip(&values) {
require!(
f.data_type() == &a.data_type(),
Error::invalid_struct_data(format!(
"Incorrect datatype for Struct field {:?}, expected {} got {}",
f.name(),
f.data_type(),
a.data_type()
))
);
require!(
f.is_nullable() || !a.is_null(),
Error::invalid_struct_data(format!(
"Value for non-nullable field {:?} cannot be null, got {}",
f.name(),
a
))
);
}
Ok(Self { fields, values })
}
pub fn fields(&self) -> &[StructField] {
&self.fields
}
pub fn values(&self) -> &[Scalar] {
&self.values
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum Scalar {
Integer(i32),
Long(i64),
Short(i16),
Byte(i8),
Float(f32),
Double(f64),
String(String),
Boolean(bool),
Timestamp(i64),
TimestampNtz(i64),
Date(i32),
Binary(Vec<u8>),
Decimal(DecimalData),
Null(DataType),
Struct(StructData),
Array(ArrayData),
Map(MapData),
}
impl Scalar {
pub fn data_type(&self) -> DataType {
match self {
Self::Integer(_) => DataType::INTEGER,
Self::Long(_) => DataType::LONG,
Self::Short(_) => DataType::SHORT,
Self::Byte(_) => DataType::BYTE,
Self::Float(_) => DataType::FLOAT,
Self::Double(_) => DataType::DOUBLE,
Self::String(_) => DataType::STRING,
Self::Boolean(_) => DataType::BOOLEAN,
Self::Timestamp(_) => DataType::TIMESTAMP,
Self::TimestampNtz(_) => DataType::TIMESTAMP_NTZ,
Self::Date(_) => DataType::DATE,
Self::Binary(_) => DataType::BINARY,
Self::Decimal(d) => DataType::from(*d.ty()),
Self::Null(data_type) => data_type.clone(),
Self::Struct(data) => DataType::struct_type_unchecked(data.fields.clone()),
Self::Array(data) => data.tpe.clone().into(),
Self::Map(data) => data.data_type.clone().into(),
}
}
pub fn is_null(&self) -> bool {
matches!(self, Self::Null(_))
}
pub fn decimal(bits: impl Into<i128>, precision: u8, scale: u8) -> DeltaResult<Self> {
let dtype = DecimalType::try_new(precision, scale)?;
let dval = DecimalData::try_new(bits, dtype)?;
Ok(Self::Decimal(dval))
}
pub(crate) fn timestamp_from_millis(millis: i64) -> DeltaResult<Self> {
let Some(timestamp) = DateTime::from_timestamp_millis(millis) else {
return Err(Error::generic(format!(
"Failed to create millisecond timestamp from {millis}"
)));
};
Ok(Self::Timestamp(timestamp.timestamp_micros()))
}
pub fn try_add(&self, other: &Scalar) -> Option<Scalar> {
use Scalar::*;
let result = match (self, other) {
(Integer(a), Integer(b)) => Integer(a.checked_add(*b)?),
(Long(a), Long(b)) => Long(a.checked_add(*b)?),
(Short(a), Short(b)) => Short(a.checked_add(*b)?),
(Byte(a), Byte(b)) => Byte(a.checked_add(*b)?),
_ => return None,
};
Some(result)
}
pub fn try_sub(&self, other: &Scalar) -> Option<Scalar> {
use Scalar::*;
let result = match (self, other) {
(Integer(a), Integer(b)) => Integer(a.checked_sub(*b)?),
(Long(a), Long(b)) => Long(a.checked_sub(*b)?),
(Short(a), Short(b)) => Short(a.checked_sub(*b)?),
(Byte(a), Byte(b)) => Byte(a.checked_sub(*b)?),
_ => return None,
};
Some(result)
}
pub fn try_mul(&self, other: &Scalar) -> Option<Scalar> {
use Scalar::*;
let result = match (self, other) {
(Integer(a), Integer(b)) => Integer(a.checked_mul(*b)?),
(Long(a), Long(b)) => Long(a.checked_mul(*b)?),
(Short(a), Short(b)) => Short(a.checked_mul(*b)?),
(Byte(a), Byte(b)) => Byte(a.checked_mul(*b)?),
_ => return None,
};
Some(result)
}
pub fn try_div(&self, other: &Scalar) -> Option<Scalar> {
use Scalar::*;
let result = match (self, other) {
(Integer(a), Integer(b)) => Integer(a.checked_div(*b)?),
(Long(a), Long(b)) => Long(a.checked_div(*b)?),
(Short(a), Short(b)) => Short(a.checked_div(*b)?),
(Byte(a), Byte(b)) => Byte(a.checked_div(*b)?),
_ => return None,
};
Some(result)
}
}
impl Display for Scalar {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Integer(i) => write!(f, "{i}"),
Self::Long(i) => write!(f, "{i}"),
Self::Short(i) => write!(f, "{i}"),
Self::Byte(i) => write!(f, "{i}"),
Self::Float(fl) => write!(f, "{fl}"),
Self::Double(fl) => write!(f, "{fl}"),
Self::String(s) => write!(f, "'{s}'"),
Self::Boolean(b) => write!(f, "{b}"),
Self::Timestamp(ts) => write!(f, "{ts}"),
Self::TimestampNtz(ts) => write!(f, "{ts}"),
Self::Date(d) => write!(f, "{d}"),
Self::Binary(b) => write!(f, "{b:?}"),
Self::Decimal(d) => match d.scale().cmp(&0) {
Ordering::Equal => {
write!(f, "{}", d.bits())
}
Ordering::Greater => {
let scale = d.scale();
let scalar_multiple = 10_i128.pow(scale as u32);
let value = d.bits();
write!(f, "{}", value / scalar_multiple)?;
write!(f, ".")?;
write!(
f,
"{:0>scale$}",
value % scalar_multiple,
scale = scale as usize
)
}
Ordering::Less => {
write!(f, "{}", d.bits())?;
for _ in 0..d.scale() {
write!(f, "0")?;
}
Ok(())
}
},
Self::Null(_) => write!(f, "null"),
Self::Struct(data) => {
write!(f, "{{")?;
let mut delim = "";
for (value, field) in data.values.iter().zip(data.fields.iter()) {
write!(f, "{delim}{}: {value}", field.name)?;
delim = ", ";
}
write!(f, "}}")
}
Self::Array(data) => {
write!(f, "(")?;
let mut delim = "";
for element in &data.elements {
write!(f, "{delim}{element}")?;
delim = ", ";
}
write!(f, ")")
}
Self::Map(data) => {
write!(f, "{{")?;
let mut delim = "";
for (key, val) in &data.pairs {
write!(f, "{delim}{key}: {val}")?;
delim = ", ";
}
write!(f, "}}")
}
}
}
}
impl Scalar {
pub fn logical_eq(&self, other: &Self) -> bool {
self.logical_partial_cmp(other) == Some(Ordering::Equal)
}
pub fn physical_eq(&self, other: &Self) -> bool {
self == other
}
pub fn logical_partial_cmp(&self, other: &Self) -> Option<Ordering> {
use Scalar::*;
match (self, other) {
(Integer(a), Integer(b)) => a.partial_cmp(b),
(Integer(_), _) => None,
(Long(a), Long(b)) => a.partial_cmp(b),
(Long(_), _) => None,
(Short(a), Short(b)) => a.partial_cmp(b),
(Short(_), _) => None,
(Byte(a), Byte(b)) => a.partial_cmp(b),
(Byte(_), _) => None,
(Float(a), Float(b)) => a.partial_cmp(b),
(Float(_), _) => None,
(Double(a), Double(b)) => a.partial_cmp(b),
(Double(_), _) => None,
(String(a), String(b)) => a.partial_cmp(b),
(String(_), _) => None,
(Boolean(a), Boolean(b)) => a.partial_cmp(b),
(Boolean(_), _) => None,
(Timestamp(a), Timestamp(b)) => a.partial_cmp(b),
(Timestamp(_), _) => None,
(TimestampNtz(a), TimestampNtz(b)) => a.partial_cmp(b),
(TimestampNtz(_), _) => None,
(Date(a), Date(b)) => a.partial_cmp(b),
(Date(_), _) => None,
(Binary(a), Binary(b)) => a.partial_cmp(b),
(Binary(_), _) => None,
(Decimal(d1), Decimal(d2)) => (d1.ty() == d2.ty())
.then(|| d1.bits().partial_cmp(&d2.bits()))
.flatten(),
(Decimal(_), _) => None,
(Null(_), _) => None, (Struct(_), _) => None, (Array(_), _) => None, (Map(_), _) => None, }
}
}
impl From<i8> for Scalar {
fn from(i: i8) -> Self {
Self::Byte(i)
}
}
impl From<i16> for Scalar {
fn from(i: i16) -> Self {
Self::Short(i)
}
}
impl From<i32> for Scalar {
fn from(i: i32) -> Self {
Self::Integer(i)
}
}
impl From<i64> for Scalar {
fn from(i: i64) -> Self {
Self::Long(i)
}
}
impl From<f32> for Scalar {
fn from(i: f32) -> Self {
Self::Float(i)
}
}
impl From<f64> for Scalar {
fn from(i: f64) -> Self {
Self::Double(i)
}
}
impl From<bool> for Scalar {
fn from(b: bool) -> Self {
Self::Boolean(b)
}
}
impl From<DecimalData> for Scalar {
fn from(d: DecimalData) -> Self {
Self::Decimal(d)
}
}
impl From<&str> for Scalar {
fn from(s: &str) -> Self {
Self::String(s.into())
}
}
impl From<String> for Scalar {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl<T: Into<Scalar> + Copy> From<&T> for Scalar {
fn from(t: &T) -> Self {
(*t).into()
}
}
impl From<&[u8]> for Scalar {
fn from(b: &[u8]) -> Self {
Self::Binary(b.into())
}
}
impl From<bytes::Bytes> for Scalar {
fn from(b: bytes::Bytes) -> Self {
Self::Binary(b.into())
}
}
impl<T> TryFrom<Vec<T>> for Scalar
where
T: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(vec: Vec<T>) -> Result<Self, Self::Error> {
let array_type = ArrayType::new(T::to_data_type(), false);
let array_data = ArrayData::try_new(array_type, vec)?;
Ok(array_data.into())
}
}
impl<T> TryFrom<Vec<Option<T>>> for Scalar
where
T: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(vec: Vec<Option<T>>) -> Result<Self, Self::Error> {
let array_type = ArrayType::new(T::to_data_type(), true);
let array_data = ArrayData::try_new(array_type, vec)?;
Ok(array_data.into())
}
}
impl<T> TryFrom<Option<Vec<T>>> for Scalar
where
T: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(opt: Option<Vec<T>>) -> Result<Self, Self::Error> {
match opt {
Some(vec) => vec.try_into(),
None => Ok(Self::Null(ArrayType::new(T::to_data_type(), false).into())),
}
}
}
impl<K, V> TryFrom<HashMap<K, V>> for Scalar
where
K: Into<Scalar> + ToDataType,
V: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(map: HashMap<K, V>) -> Result<Self, Self::Error> {
let map_type = MapType::new(K::to_data_type(), V::to_data_type(), false);
let map_data = MapData::try_new(map_type, map)?;
Ok(map_data.into())
}
}
impl<K, V> TryFrom<HashMap<K, Option<V>>> for Scalar
where
K: Into<Scalar> + ToDataType,
V: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(map: HashMap<K, Option<V>>) -> Result<Self, Self::Error> {
let map_type = MapType::new(K::to_data_type(), V::to_data_type(), true);
let map_data = MapData::try_new(map_type, map)?;
Ok(map_data.into())
}
}
impl<K, V> TryFrom<Option<HashMap<K, V>>> for Scalar
where
K: Into<Scalar> + ToDataType,
V: Into<Scalar> + ToDataType,
{
type Error = Error;
fn try_from(opt: Option<HashMap<K, V>>) -> Result<Self, Self::Error> {
match opt {
Some(map) => map.try_into(),
None => Ok(Self::Null(
MapType::new(K::to_data_type(), V::to_data_type(), false).into(),
)),
}
}
}
impl<T: Into<Scalar> + ToDataType> From<Option<T>> for Scalar {
fn from(t: Option<T>) -> Self {
match t {
Some(t) => t.into(),
None => Self::Null(T::to_data_type()),
}
}
}
impl From<ArrayData> for Scalar {
fn from(array_data: ArrayData) -> Self {
Self::Array(array_data)
}
}
impl From<MapData> for Scalar {
fn from(map_data: MapData) -> Self {
Self::Map(map_data)
}
}
impl PrimitiveType {
fn data_type(&self) -> DataType {
DataType::Primitive(self.clone())
}
pub fn parse_scalar(&self, raw: &str) -> Result<Scalar, Error> {
use PrimitiveType::*;
if raw.is_empty() {
return Ok(Scalar::Null(self.data_type()));
}
match self {
String => Ok(Scalar::String(raw.to_string())),
Binary => Ok(Scalar::Binary(raw.to_string().into_bytes())),
Byte => self.parse_str_as_scalar(raw, Scalar::Byte),
Decimal(dtype) => Self::parse_decimal(raw, *dtype),
Short => self.parse_str_as_scalar(raw, Scalar::Short),
Integer => self.parse_str_as_scalar(raw, Scalar::Integer),
Long => self.parse_str_as_scalar(raw, Scalar::Long),
Float => self.parse_str_as_scalar(raw, Scalar::Float),
Double => self.parse_str_as_scalar(raw, Scalar::Double),
Boolean => {
if raw.eq_ignore_ascii_case("true") {
Ok(Scalar::Boolean(true))
} else if raw.eq_ignore_ascii_case("false") {
Ok(Scalar::Boolean(false))
} else {
Err(self.parse_error(raw))
}
}
Date => {
let date = NaiveDate::parse_from_str(raw, "%Y-%m-%d")
.map_err(|_| self.parse_error(raw))?
.and_hms_opt(0, 0, 0)
.ok_or(self.parse_error(raw))?;
let date = Utc.from_utc_datetime(&date);
let days = date.signed_duration_since(DateTime::UNIX_EPOCH).num_days() as i32;
Ok(Scalar::Date(days))
}
TimestampNtz | Timestamp => {
let mut timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f");
if timestamp.is_err() && *self == Timestamp {
timestamp = NaiveDateTime::parse_from_str(raw, "%+");
}
let timestamp = timestamp.map_err(|_| self.parse_error(raw))?;
let timestamp = Utc.from_utc_datetime(×tamp);
let micros = timestamp
.signed_duration_since(DateTime::UNIX_EPOCH)
.num_microseconds()
.ok_or(self.parse_error(raw))?;
match self {
Timestamp => Ok(Scalar::Timestamp(micros)),
TimestampNtz => Ok(Scalar::TimestampNtz(micros)),
_ => unreachable!(),
}
}
}
}
fn parse_error(&self, raw: &str) -> Error {
Error::ParseError(raw.to_string(), self.data_type())
}
fn parse_str_as_scalar<T: std::str::FromStr>(
&self,
raw: &str,
f: impl FnOnce(T) -> Scalar,
) -> Result<Scalar, Error> {
match raw.parse() {
Ok(val) => Ok(f(val)),
Err(..) => Err(self.parse_error(raw)),
}
}
fn parse_decimal(raw: &str, dtype: DecimalType) -> Result<Scalar, Error> {
let (base, exp): (&str, i128) = match raw.find(['e', 'E']) {
None => (raw, 0), Some(pos) => {
let (base, exp) = raw.split_at(pos);
(base, exp[1..].parse()?)
}
};
let parse_error = || PrimitiveType::from(dtype).parse_error(raw);
require!(!base.is_empty(), parse_error());
let (int_part, frac_part, frac_digits) = match base.find('.') {
None => {
(base, None, 0)
}
Some(pos) if pos == base.len() - 1 => {
(&base[..pos], None, 0)
}
Some(pos) => {
let (int_part, frac_part) = (&base[..pos], &base[pos + 1..]);
(int_part, Some(frac_part), frac_part.len() as i128)
}
};
let scale = frac_digits - exp;
let scale: u8 = scale.try_into().map_err(|_| parse_error())?;
require!(scale == dtype.scale(), parse_error());
let int: i128 = match frac_part {
None => int_part.parse()?,
Some(frac_part) => format!("{int_part}{frac_part}").parse()?,
};
Ok(Scalar::Decimal(DecimalData::try_new(int, dtype)?))
}
}
#[cfg(test)]
mod tests {
use std::f32::consts::PI;
use crate::expressions::{column_expr, BinaryPredicateOp};
use crate::utils::test_utils::assert_result_error_with_message;
use crate::{Expression as Expr, Predicate as Pred};
use super::*;
#[test]
fn test_bad_decimal() {
let dtype = DecimalType::try_new(3, 0).unwrap();
DecimalData::try_new(123456789, dtype).expect_err("should have failed");
PrimitiveType::parse_decimal("0.12345", dtype).expect_err("should have failed");
PrimitiveType::parse_decimal("12345", dtype).expect_err("should have failed");
}
#[test]
fn test_decimal_display() {
let s = Scalar::decimal(123456789, 9, 2).unwrap();
assert_eq!(s.to_string(), "1234567.89");
let s = Scalar::decimal(123456789, 9, 0).unwrap();
assert_eq!(s.to_string(), "123456789");
let s = Scalar::decimal(123456789, 9, 9).unwrap();
assert_eq!(s.to_string(), "0.123456789");
}
fn assert_decimal(
raw: &str,
expect_int: i128,
expect_prec: u8,
expect_scale: u8,
) -> Result<(), Box<dyn std::error::Error>> {
let s = PrimitiveType::decimal(expect_prec, expect_scale)?;
match s.parse_scalar(raw)? {
Scalar::Decimal(val) => {
assert_eq!(val.bits(), expect_int);
assert_eq!(val.precision(), expect_prec);
assert_eq!(val.scale(), expect_scale);
}
_ => panic!("Didn't parse as decimal"),
};
Ok(())
}
#[test]
fn test_decimal_precision() {
assert_eq!(get_decimal_precision(0), 0);
assert_eq!(get_decimal_precision(1), 1);
assert_eq!(get_decimal_precision(9), 1);
assert_eq!(get_decimal_precision(10), 2);
assert_eq!(get_decimal_precision(99), 2);
assert_eq!(get_decimal_precision(100), 3);
assert_eq!(get_decimal_precision(999), 3);
assert_eq!(get_decimal_precision(1000), 4);
assert_eq!(get_decimal_precision(9999), 4);
assert_eq!(get_decimal_precision(10000), 5);
assert_eq!(get_decimal_precision(999_9999), 7);
assert_eq!(get_decimal_precision(1000_0000), 8);
assert_eq!(get_decimal_precision(9999_9999), 8);
assert_eq!(get_decimal_precision(1_0000_0000), 9);
assert_eq!(get_decimal_precision(999_9999_9999_9999), 15);
assert_eq!(get_decimal_precision(1000_0000_0000_0000), 16);
assert_eq!(get_decimal_precision(9999_9999_9999_9999), 16);
assert_eq!(get_decimal_precision(1_0000_0000_0000_0000), 17);
assert_eq!(
get_decimal_precision(999_9999_9999_9999_9999_9999_9999_9999),
31
);
assert_eq!(
get_decimal_precision(1000_0000_0000_0000_0000_0000_0000_0000),
32
);
assert_eq!(
get_decimal_precision(9999_9999_9999_9999_9999_9999_9999_9999),
32
);
assert_eq!(
get_decimal_precision(1_0000_0000_0000_0000_0000_0000_0000_0000),
33
);
assert_eq!(
get_decimal_precision(9_9999_9999_9999_9999_9999_9999_9999_9999_9999),
37
);
assert_eq!(
get_decimal_precision(10_0000_0000_0000_0000_0000_0000_0000_0000_0000),
38
);
assert_eq!(
get_decimal_precision(99_9999_9999_9999_9999_9999_9999_9999_9999_9999),
38
);
assert_eq!(
get_decimal_precision(100_0000_0000_0000_0000_0000_0000_0000_0000_0000),
39
);
}
#[test]
fn test_parse_decimal() -> Result<(), Box<dyn std::error::Error>> {
assert_decimal("0.999", 999, 3, 3)?;
assert_decimal("0", 0, 1, 0)?;
assert_decimal("0.00", 0, 3, 2)?;
assert_decimal("123", 123, 3, 0)?;
assert_decimal("-123", -123, 3, 0)?;
assert_decimal("-123.", -123, 3, 0)?;
assert_decimal("123000", 123000, 6, 0)?;
assert_decimal("12.0", 120, 3, 1)?;
assert_decimal("12.3", 123, 3, 1)?;
assert_decimal("0.00123", 123, 5, 5)?;
assert_decimal("1234.5E-4", 12345, 5, 5)?;
assert_decimal("-0", 0, 1, 0)?;
assert_decimal("12.000000000000000000", 12000000000000000000, 38, 18)?;
Ok(())
}
fn expect_fail_parse(raw: &str, prec: u8, scale: u8) {
let s = PrimitiveType::decimal(prec, scale).unwrap();
let res = s.parse_scalar(raw);
assert!(res.is_err(), "Fail on {raw}");
}
#[test]
fn test_parse_decimal_expect_fail() {
expect_fail_parse("1.000", 3, 3);
expect_fail_parse("iowjef", 1, 0);
expect_fail_parse("123Ef", 1, 0);
expect_fail_parse("1d2E3", 1, 0);
expect_fail_parse("a", 1, 0);
expect_fail_parse("2.a", 1, 1);
expect_fail_parse("E45", 1, 0);
expect_fail_parse("1.2.3", 1, 0);
expect_fail_parse("1.2E1.3", 1, 0);
expect_fail_parse("123.45", 5, 1);
expect_fail_parse(".45", 5, 1);
expect_fail_parse("+", 1, 0);
expect_fail_parse("-", 1, 0);
expect_fail_parse("0.-0", 2, 1);
expect_fail_parse("--1.0", 1, 1);
expect_fail_parse("+-1.0", 1, 1);
expect_fail_parse("-+1.0", 1, 1);
expect_fail_parse("++1.0", 1, 1);
expect_fail_parse("1.0E1+", 1, 1);
expect_fail_parse("0.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999", 1, 0);
expect_fail_parse("0.E170141183460469231731687303715884105727", 1, 0);
}
#[test]
fn test_arrays() {
let array = Scalar::Array(ArrayData {
tpe: ArrayType::new(DataType::INTEGER, false),
elements: vec![Scalar::Integer(1), Scalar::Integer(2), Scalar::Integer(3)],
});
let column = column_expr!("item");
let array_op = Pred::binary(BinaryPredicateOp::In, Expr::literal(10), array.clone());
let array_not_op = Pred::not(Pred::binary(
BinaryPredicateOp::In,
Expr::literal(10),
array,
));
let column_op = Pred::binary(BinaryPredicateOp::In, Expr::literal(PI), column.clone());
let column_not_op = Pred::not(Pred::binary(
BinaryPredicateOp::In,
Expr::literal("Cool"),
column,
));
assert_eq!(&format!("{array_op}"), "10 IN (1, 2, 3)");
assert_eq!(&format!("{array_not_op}"), "NOT(10 IN (1, 2, 3))");
assert_eq!(&format!("{column_op}"), "3.1415927 IN Column(item)");
assert_eq!(&format!("{column_not_op}"), "NOT('Cool' IN Column(item))");
}
#[test]
fn test_invalid_array() {
assert_result_error_with_message(
ArrayData::try_new(
ArrayType::new(DataType::INTEGER, false),
[Scalar::Integer(1), Scalar::String("s".to_string())],
),
"Schema error: Array scalar type mismatch: expected integer, got string",
);
assert_result_error_with_message(
ArrayData::try_new(ArrayType::new(DataType::INTEGER, false), [1.into(), None]),
"Schema error: Array element cannot be null for non-nullable array",
);
}
#[test]
fn test_invalid_map() {
assert_result_error_with_message(MapData::try_new(
MapType::new(DataType::STRING, DataType::INTEGER, false),
[(Scalar::Integer(1), Scalar::String("s".to_string())),],
), "Schema error: Map scalar type mismatch: expected key type string, got key type integer");
assert_result_error_with_message(
MapData::try_new(
MapType::new(DataType::STRING, DataType::STRING, true),
[(
Scalar::Null(DataType::STRING), Scalar::String("s".to_string()), )],
),
"Schema error: Map key cannot be null",
);
assert_result_error_with_message(
MapData::try_new(
MapType::new(DataType::STRING, DataType::STRING, false),
[(
Scalar::String("s".to_string()), Scalar::Null(DataType::STRING), )],
),
"Schema error: Null map value disallowed if map value_contains_null is false",
);
}
#[test]
fn test_timestamp_parse() {
let assert_timestamp_eq = |scalar_string, micros| {
let scalar = PrimitiveType::Timestamp
.parse_scalar(scalar_string)
.unwrap();
assert_eq!(scalar, Scalar::Timestamp(micros));
};
assert_timestamp_eq("1971-07-22T03:06:40.678910Z", 49000000678910);
assert_timestamp_eq("1971-07-22T03:06:40Z", 49000000000000);
assert_timestamp_eq("2011-01-11 13:06:07", 1294751167000000);
assert_timestamp_eq("2011-01-11 13:06:07.123456", 1294751167123456);
assert_timestamp_eq("1970-01-01 00:00:00", 0);
}
#[test]
fn test_timestamp_ntz_parse() {
let assert_timestamp_eq = |scalar_string, micros| {
let scalar = PrimitiveType::TimestampNtz
.parse_scalar(scalar_string)
.unwrap();
assert_eq!(scalar, Scalar::TimestampNtz(micros));
};
assert_timestamp_eq("2011-01-11 13:06:07", 1294751167000000);
assert_timestamp_eq("2011-01-11 13:06:07.123456", 1294751167123456);
assert_timestamp_eq("1970-01-01 00:00:00", 0);
}
#[test]
fn test_timestamp_parse_fails() {
let assert_timestamp_fails = |p_type: &PrimitiveType, scalar_string| {
let res = p_type.parse_scalar(scalar_string);
assert!(res.is_err());
};
let p_type = PrimitiveType::TimestampNtz;
assert_timestamp_fails(&p_type, "1971-07-22T03:06:40.678910Z");
assert_timestamp_fails(&p_type, "1971-07-22T03:06:40Z");
assert_timestamp_fails(&p_type, "1971-07-22");
let p_type = PrimitiveType::Timestamp;
assert_timestamp_fails(&p_type, "1971-07-22");
}
#[test]
fn test_partial_cmp() {
let a = Scalar::Integer(1);
let b = Scalar::Integer(2);
let c = Scalar::Null(DataType::INTEGER);
assert_eq!(a.logical_partial_cmp(&b), Some(Ordering::Less));
assert_eq!(b.logical_partial_cmp(&a), Some(Ordering::Greater));
assert_eq!(a.logical_partial_cmp(&a), Some(Ordering::Equal));
assert_eq!(b.logical_partial_cmp(&b), Some(Ordering::Equal));
assert_eq!(a.logical_partial_cmp(&c), None);
assert_eq!(c.logical_partial_cmp(&a), None);
let null = Scalar::Null(DataType::INTEGER);
assert_eq!(null.logical_partial_cmp(&null), None);
}
#[test]
fn test_partial_eq() {
let a = Scalar::Integer(1);
let b = Scalar::Integer(2);
let c = Scalar::Null(DataType::INTEGER);
assert!(!a.logical_eq(&b));
assert!(a.logical_eq(&a));
assert!(!a.logical_eq(&c));
assert!(!c.logical_eq(&a));
let null = Scalar::Null(DataType::INTEGER);
assert!(!null.logical_eq(&null));
}
#[test]
fn test_hashmap_conversion() -> DeltaResult<()> {
let mut map = HashMap::new();
map.insert("key1".to_string(), 42i32);
map.insert("key2".to_string(), 100i32);
let scalar = Scalar::try_from(map)?;
assert!(matches!(scalar, Scalar::Map(_)));
let expected_map_type = MapType::new(DataType::STRING, DataType::INTEGER, false);
assert_eq!(scalar.data_type(), DataType::from(expected_map_type));
let Scalar::Map(map_data) = scalar else {
panic!("Expected Map scalar");
};
let pairs = map_data.pairs();
assert_eq!(pairs.len(), 2);
assert!(!map_data.map_type().value_contains_null());
let entry1 = (Scalar::String("key1".to_string()), Scalar::Integer(42));
let entry2 = (Scalar::String("key2".to_string()), Scalar::Integer(100));
assert!(pairs.contains(&entry1), "Missing key1 -> 42 pair");
assert!(pairs.contains(&entry2), "Missing key2 -> 100 pair");
Ok(())
}
#[test]
fn test_hashmap_conversion_with_nullable_values() -> DeltaResult<()> {
let mut map = HashMap::new();
map.insert("key1".to_string(), Some(42i32));
map.insert("key2".to_string(), None);
map.insert("key3".to_string(), Some(100i32));
let scalar = Scalar::try_from(map)?;
assert!(matches!(scalar, Scalar::Map(_)));
let expected_map_type = MapType::new(DataType::STRING, DataType::INTEGER, true);
assert_eq!(scalar.data_type(), DataType::from(expected_map_type));
let Scalar::Map(map_data) = scalar else {
panic!("Expected Map scalar");
};
let pairs = map_data.pairs();
assert_eq!(pairs.len(), 3);
assert!(map_data.map_type().value_contains_null());
let entry1 = (Scalar::String("key1".to_string()), Scalar::Integer(42));
let entry2 = (
Scalar::String("key2".to_string()),
Scalar::Null(DataType::INTEGER),
);
let entry3 = (Scalar::String("key3".to_string()), Scalar::Integer(100));
assert!(pairs.contains(&entry1), "Missing key1 -> 42 pair");
assert!(pairs.contains(&entry2), "Missing key2 -> null pair");
assert!(pairs.contains(&entry3), "Missing key3 -> 100 pair");
Ok(())
}
#[test]
fn test_vec_conversion() -> DeltaResult<()> {
let vec = vec![42i32, 100i32, 200i32];
let scalar = Scalar::try_from(vec)?;
assert!(matches!(scalar, Scalar::Array(_)));
let expected_array_type = ArrayType::new(DataType::INTEGER, false);
assert_eq!(scalar.data_type(), DataType::from(expected_array_type));
let Scalar::Array(array_data) = scalar else {
panic!("Expected Array scalar");
};
let elements = array_data.array_elements();
assert_eq!(elements.len(), 3);
assert!(!array_data.array_type().contains_null());
assert_eq!(elements[0], Scalar::Integer(42));
assert_eq!(elements[1], Scalar::Integer(100));
assert_eq!(elements[2], Scalar::Integer(200));
Ok(())
}
#[test]
fn test_vec_conversion_with_nullable_values() -> DeltaResult<()> {
let vec = vec![Some(42i32), None, Some(100i32)];
let scalar = Scalar::try_from(vec)?;
assert!(matches!(scalar, Scalar::Array(_)));
let expected_array_type = ArrayType::new(DataType::INTEGER, true);
assert_eq!(scalar.data_type(), DataType::from(expected_array_type));
let Scalar::Array(array_data) = scalar else {
panic!("Expected Array scalar");
};
let elements = array_data.array_elements();
assert_eq!(elements.len(), 3);
assert!(array_data.array_type().contains_null());
assert_eq!(elements[0], Scalar::Integer(42));
assert!(elements[1].is_null());
assert_eq!(elements[2], Scalar::Integer(100));
Ok(())
}
#[test]
fn test_vec_conversion_different_types() -> DeltaResult<()> {
let string_vec = vec!["hello".to_string(), "world".to_string()];
let string_scalar = Scalar::try_from(string_vec)?;
if let Scalar::Array(array_data) = string_scalar {
let expected_array_type = ArrayType::new(DataType::STRING, false);
assert_eq!(array_data.array_type(), &expected_array_type);
} else {
panic!("Expected Array scalar");
}
let bool_vec = vec![true, false, true];
let bool_scalar = Scalar::try_from(bool_vec)?;
if let Scalar::Array(array_data) = bool_scalar {
let expected_array_type = ArrayType::new(DataType::BOOLEAN, false);
assert_eq!(array_data.array_type(), &expected_array_type);
} else {
panic!("Expected Array scalar");
}
Ok(())
}
#[test]
fn test_bytes_conversion() {
let bytes = bytes::Bytes::from(vec![1, 2, 3, 4, 5]);
let scalar: Scalar = bytes.into();
assert!(matches!(scalar, Scalar::Binary(_)));
assert_eq!(scalar.data_type(), DataType::BINARY);
if let Scalar::Binary(data) = scalar {
assert_eq!(data, vec![1, 2, 3, 4, 5]);
} else {
panic!("Expected Binary scalar");
}
let empty_bytes = bytes::Bytes::new();
let empty_scalar: Scalar = empty_bytes.into();
assert!(matches!(empty_scalar, Scalar::Binary(_)));
if let Scalar::Binary(data) = empty_scalar {
assert!(data.is_empty());
} else {
panic!("Expected Binary scalar");
}
}
}