use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
use super::error::{Error, Result};
use super::types::DataType;
use crate::common::{CompactArc, SmartString};
const TIMESTAMP_FORMATS: &[&str] = &[
"%Y-%m-%dT%H:%M:%S%.f%:z", "%Y-%m-%dT%H:%M:%S%:z", "%Y-%m-%dT%H:%M:%S%.fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%.f", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S%.f", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d %H:%M:%S", "%Y/%m/%d", "%m/%d/%Y", "%d/%m/%Y", ];
const TIME_FORMATS: &[&str] = &[
"%H:%M:%S%.f", "%H:%M:%S", "%H:%M", ];
#[derive(Debug, Clone)]
pub enum Value {
Null(DataType),
Integer(i64),
Float(f64),
Text(SmartString),
Boolean(bool),
Timestamp(DateTime<Utc>),
Extension(CompactArc<[u8]>),
}
pub const NULL_VALUE: Value = Value::Null(DataType::Null);
impl Value {
#[inline]
pub fn null(data_type: DataType) -> Self {
Value::Null(data_type)
}
#[inline(always)]
pub fn null_unknown() -> Self {
Value::Null(DataType::Null)
}
pub fn integer(value: i64) -> Self {
Value::Integer(value)
}
pub fn float(value: f64) -> Self {
Value::Float(value)
}
pub fn text(value: impl Into<String>) -> Self {
Value::Text(SmartString::from_string_shared(value.into()))
}
pub fn text_arc(value: Arc<str>) -> Self {
Value::Text(SmartString::from(value))
}
pub fn boolean(value: bool) -> Self {
Value::Boolean(value)
}
pub fn timestamp(value: DateTime<Utc>) -> Self {
Value::Timestamp(value)
}
pub fn json(value: impl Into<String>) -> Self {
let s_bytes = value.into().into_bytes();
let mut bytes = Vec::with_capacity(1 + s_bytes.len());
bytes.push(DataType::Json as u8);
bytes.extend_from_slice(&s_bytes);
Value::Extension(CompactArc::from(bytes))
}
pub fn vector(data: Vec<f32>) -> Self {
let mut bytes = Vec::with_capacity(1 + data.len() * 4);
bytes.push(DataType::Vector as u8);
for f in &data {
bytes.extend_from_slice(&f.to_le_bytes());
}
Value::Extension(CompactArc::from(bytes))
}
pub fn vector_from_bytes(raw_f32_bytes: CompactArc<[u8]>) -> Self {
let mut bytes = Vec::with_capacity(1 + raw_f32_bytes.len());
bytes.push(DataType::Vector as u8);
bytes.extend_from_slice(&raw_f32_bytes);
Value::Extension(CompactArc::from(bytes))
}
pub fn data_type(&self) -> DataType {
match self {
Value::Null(dt) => *dt,
Value::Integer(_) => DataType::Integer,
Value::Float(_) => DataType::Float,
Value::Text(_) => DataType::Text,
Value::Boolean(_) => DataType::Boolean,
Value::Timestamp(_) => DataType::Timestamp,
Value::Extension(data) => data
.first()
.and_then(|&b| DataType::from_u8(b))
.unwrap_or(DataType::Null),
}
}
#[inline(always)]
pub fn is_null(&self) -> bool {
matches!(self, Value::Null(_))
}
pub fn as_int64(&self) -> Option<i64> {
match self {
Value::Null(_) => None,
Value::Integer(v) => Some(*v),
Value::Float(v) => Some(*v as i64),
Value::Text(s) => s
.parse::<i64>()
.ok()
.or_else(|| s.parse::<f64>().ok().map(|f| f as i64)),
Value::Boolean(b) => Some(if *b { 1 } else { 0 }),
Value::Timestamp(t) => Some(t.timestamp_nanos_opt().unwrap_or(0)),
Value::Extension(_) => None,
}
}
pub fn as_float64(&self) -> Option<f64> {
match self {
Value::Null(_) => None,
Value::Integer(v) => Some(*v as f64),
Value::Float(v) => Some(*v),
Value::Text(s) => s.parse::<f64>().ok(),
Value::Boolean(b) => Some(if *b { 1.0 } else { 0.0 }),
Value::Timestamp(_) | Value::Extension(_) => None,
}
}
pub fn as_boolean(&self) -> Option<bool> {
match self {
Value::Null(_) => None,
Value::Integer(v) => Some(*v != 0),
Value::Float(v) => Some(*v != 0.0),
Value::Text(s) => {
let s_ref: &str = s.as_ref();
if s_ref.eq_ignore_ascii_case("true")
|| s_ref.eq_ignore_ascii_case("t")
|| s_ref.eq_ignore_ascii_case("yes")
|| s_ref.eq_ignore_ascii_case("y")
|| s_ref == "1"
{
Some(true)
} else if s_ref.eq_ignore_ascii_case("false")
|| s_ref.eq_ignore_ascii_case("f")
|| s_ref.eq_ignore_ascii_case("no")
|| s_ref.eq_ignore_ascii_case("n")
|| s_ref == "0"
|| s_ref.is_empty()
{
Some(false)
} else {
s_ref.parse::<f64>().ok().map(|f| f != 0.0)
}
}
Value::Boolean(b) => Some(*b),
Value::Timestamp(_) | Value::Extension(_) => None,
}
}
pub fn as_string(&self) -> Option<String> {
match self {
Value::Null(_) => None,
Value::Integer(v) => Some(v.to_string()),
Value::Float(v) => Some(format_float(*v)),
Value::Text(s) => Some(s.to_string()),
Value::Boolean(b) => Some(if *b { "true" } else { "false" }.to_string()),
Value::Timestamp(t) => Some(t.to_rfc3339()),
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
Some(std::str::from_utf8(&data[1..]).unwrap_or("").to_string())
}
Value::Extension(data) if data.first() == Some(&(DataType::Vector as u8)) => {
Some(format_vector_bytes(&data[1..]))
}
Value::Extension(data) => {
if data.len() > 1 {
std::str::from_utf8(&data[1..]).ok().map(|s| s.to_string())
} else {
None
}
}
}
}
pub fn as_str(&self) -> Option<&str> {
match self {
Value::Text(s) => Some(s.as_str()),
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
Some(std::str::from_utf8(&data[1..]).unwrap_or(""))
}
_ => None,
}
}
pub fn as_timestamp(&self) -> Option<DateTime<Utc>> {
match self {
Value::Null(_) => None,
Value::Timestamp(t) => Some(*t),
Value::Text(s) => parse_timestamp(s).ok(),
Value::Integer(nanos) => {
DateTime::from_timestamp(*nanos / 1_000_000_000, (*nanos % 1_000_000_000) as u32)
}
_ => None,
}
}
pub fn as_json(&self) -> Option<&str> {
match self {
Value::Null(_) => Some("{}"),
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
Some(std::str::from_utf8(&data[1..]).unwrap_or(""))
}
_ => None,
}
}
pub fn as_vector_f32(&self) -> Option<Vec<f32>> {
match self {
Value::Extension(data) if data.first() == Some(&(DataType::Vector as u8)) => {
let payload = &data[1..];
let len = payload.len() / 4;
let mut result = Vec::with_capacity(len);
for i in 0..len {
let bytes = [
payload[i * 4],
payload[i * 4 + 1],
payload[i * 4 + 2],
payload[i * 4 + 3],
];
result.push(f32::from_le_bytes(bytes));
}
Some(result)
}
_ => None,
}
}
pub fn compare(&self, other: &Value) -> Result<Ordering> {
if self.is_null() || other.is_null() {
if self.is_null() && other.is_null() {
return Ok(Ordering::Equal);
}
return Err(Error::NullComparison);
}
if self.data_type() == other.data_type() {
return self.compare_same_type(other);
}
if self.data_type().is_numeric() && other.data_type().is_numeric() {
let v1 = self.as_float64().unwrap();
let v2 = other.as_float64().unwrap();
return Ok(compare_floats(v1, v2));
}
match (self, other) {
(Value::Timestamp(ts), Value::Text(s)) => {
if let Ok(parsed) = parse_timestamp(s) {
return Ok(ts.cmp(&parsed));
}
}
(Value::Timestamp(ts), Value::Extension(data))
if data.first() == Some(&(DataType::Json as u8)) =>
{
let s = std::str::from_utf8(&data[1..]).unwrap_or("");
if let Ok(parsed) = parse_timestamp(s) {
return Ok(ts.cmp(&parsed));
}
}
(Value::Text(s), Value::Timestamp(ts)) => {
if let Ok(parsed) = parse_timestamp(s) {
return Ok(parsed.cmp(ts));
}
}
(Value::Extension(data), Value::Timestamp(ts))
if data.first() == Some(&(DataType::Json as u8)) =>
{
let s = std::str::from_utf8(&data[1..]).unwrap_or("");
if let Ok(parsed) = parse_timestamp(s) {
return Ok(parsed.cmp(ts));
}
}
_ => {}
}
let s1 = self.as_string().unwrap_or_default();
let s2 = other.as_string().unwrap_or_default();
Ok(s1.cmp(&s2))
}
fn compare_same_type(&self, other: &Value) -> Result<Ordering> {
match (self, other) {
(Value::Integer(a), Value::Integer(b)) => Ok(a.cmp(b)),
(Value::Float(a), Value::Float(b)) => Ok(compare_floats(*a, *b)),
(Value::Text(a), Value::Text(b)) => Ok(a.cmp(b)),
(Value::Boolean(a), Value::Boolean(b)) => Ok(a.cmp(b)),
(Value::Timestamp(a), Value::Timestamp(b)) => Ok(a.cmp(b)),
(Value::Extension(a), Value::Extension(b)) => {
if a.first() != b.first() {
return Err(Error::IncomparableTypes);
}
if a == b {
Ok(Ordering::Equal)
} else {
Err(Error::IncomparableTypes)
}
}
_ => Err(Error::IncomparableTypes),
}
}
pub fn from_typed(value: Option<&dyn std::any::Any>, data_type: DataType) -> Self {
match value {
None => Value::Null(data_type),
Some(v) => {
match data_type {
DataType::Integer => {
if let Some(&i) = v.downcast_ref::<i64>() {
Value::Integer(i)
} else if let Some(&i) = v.downcast_ref::<i32>() {
Value::Integer(i as i64)
} else if let Some(s) = v.downcast_ref::<String>() {
s.parse::<i64>()
.map(Value::Integer)
.unwrap_or(Value::Null(data_type))
} else {
Value::Null(data_type)
}
}
DataType::Float => {
if let Some(&f) = v.downcast_ref::<f64>() {
Value::Float(f)
} else if let Some(&i) = v.downcast_ref::<i64>() {
Value::Float(i as f64)
} else if let Some(s) = v.downcast_ref::<String>() {
s.parse::<f64>()
.map(Value::Float)
.unwrap_or(Value::Null(data_type))
} else {
Value::Null(data_type)
}
}
DataType::Text => {
if let Some(s) = v.downcast_ref::<String>() {
Value::Text(SmartString::new(s))
} else if let Some(&s) = v.downcast_ref::<&str>() {
Value::Text(SmartString::from(s))
} else {
Value::Null(data_type)
}
}
DataType::Boolean => {
if let Some(&b) = v.downcast_ref::<bool>() {
Value::Boolean(b)
} else if let Some(&i) = v.downcast_ref::<i64>() {
Value::Boolean(i != 0)
} else {
Value::Null(data_type)
}
}
DataType::Timestamp => {
if let Some(&t) = v.downcast_ref::<DateTime<Utc>>() {
Value::Timestamp(t)
} else if let Some(s) = v.downcast_ref::<String>() {
parse_timestamp(s)
.map(Value::Timestamp)
.unwrap_or(Value::Null(data_type))
} else {
Value::Null(data_type)
}
}
DataType::Json => {
if let Some(s) = v.downcast_ref::<String>() {
if serde_json::from_str::<serde_json::Value>(s).is_ok() {
Value::json(s)
} else {
Value::Null(data_type)
}
} else {
Value::Null(data_type)
}
}
DataType::Vector => {
if let Some(vec) = v.downcast_ref::<Vec<f32>>() {
Value::vector(vec.clone())
} else {
Value::Null(data_type)
}
}
DataType::Null => Value::Null(DataType::Null),
}
}
}
}
pub fn coerce_to_type(&self, target_type: DataType) -> Value {
if self.is_null() {
return Value::Null(target_type);
}
if self.data_type() == target_type {
return self.clone();
}
match target_type {
DataType::Integer => {
match self {
Value::Integer(v) => Value::Integer(*v),
Value::Float(v) => Value::Integer(*v as i64),
Value::Text(s) => s
.parse::<i64>()
.map(Value::Integer)
.unwrap_or(Value::Null(target_type)),
Value::Boolean(b) => Value::Integer(if *b { 1 } else { 0 }),
_ => Value::Null(target_type),
}
}
DataType::Float => {
match self {
Value::Float(v) => Value::Float(*v),
Value::Integer(v) => Value::Float(*v as f64),
Value::Text(s) => s
.parse::<f64>()
.map(Value::Float)
.unwrap_or(Value::Null(target_type)),
Value::Boolean(b) => Value::Float(if *b { 1.0 } else { 0.0 }),
_ => Value::Null(target_type),
}
}
DataType::Text => {
match self {
Value::Text(s) => Value::Text(s.clone()),
Value::Integer(v) => Value::Text(SmartString::from_string(v.to_string())),
Value::Float(v) => Value::Text(SmartString::from_string(format_float(*v))),
Value::Boolean(b) => {
Value::Text(SmartString::new(if *b { "true" } else { "false" }))
}
Value::Timestamp(t) => Value::Text(SmartString::from_string(t.to_rfc3339())),
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
Value::Text(SmartString::new(
std::str::from_utf8(&data[1..]).unwrap_or(""),
))
}
Value::Extension(data) if data.first() == Some(&(DataType::Vector as u8)) => {
Value::Text(SmartString::from_string(format_vector_bytes(&data[1..])))
}
Value::Extension(_) => Value::Null(target_type),
Value::Null(_) => Value::Null(target_type),
}
}
DataType::Boolean => {
match self {
Value::Boolean(b) => Value::Boolean(*b),
Value::Integer(v) => Value::Boolean(*v != 0),
Value::Float(v) => Value::Boolean(*v != 0.0),
Value::Text(s) => {
let s_ref: &str = s.as_ref();
if s_ref.eq_ignore_ascii_case("true")
|| s_ref.eq_ignore_ascii_case("t")
|| s_ref.eq_ignore_ascii_case("yes")
|| s_ref.eq_ignore_ascii_case("y")
|| s_ref == "1"
{
Value::Boolean(true)
} else if s_ref.eq_ignore_ascii_case("false")
|| s_ref.eq_ignore_ascii_case("f")
|| s_ref.eq_ignore_ascii_case("no")
|| s_ref.eq_ignore_ascii_case("n")
|| s_ref == "0"
{
Value::Boolean(false)
} else {
Value::Null(target_type)
}
}
_ => Value::Null(target_type),
}
}
DataType::Timestamp => {
match self {
Value::Timestamp(t) => Value::Timestamp(*t),
Value::Text(s) => parse_timestamp(s)
.map(Value::Timestamp)
.unwrap_or(Value::Null(target_type)),
Value::Integer(nanos) => {
DateTime::from_timestamp(
*nanos / 1_000_000_000,
(*nanos % 1_000_000_000) as u32,
)
.map(Value::Timestamp)
.unwrap_or(Value::Null(target_type))
}
_ => Value::Null(target_type),
}
}
DataType::Json => {
match self {
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
self.clone()
}
Value::Text(s) => {
if serde_json::from_str::<serde_json::Value>(s.as_str()).is_ok() {
Value::json(s.as_str())
} else {
Value::Null(target_type)
}
}
Value::Integer(v) => Value::json(v.to_string()),
Value::Float(v) => Value::json(format_float(*v)),
Value::Boolean(b) => Value::json(if *b { "true" } else { "false" }),
_ => Value::Null(target_type),
}
}
DataType::Vector => match self {
Value::Extension(data) if data.first() == Some(&(DataType::Vector as u8)) => {
self.clone()
}
Value::Text(s) => {
if let Some(floats) = parse_vector_str(s.as_str()) {
Value::vector(floats)
} else {
Value::Null(target_type)
}
}
_ => Value::Null(target_type),
},
DataType::Null => Value::Null(DataType::Null),
}
}
#[inline]
pub fn into_coerce_to_type(self, target_type: DataType) -> Value {
if self.is_null() {
return Value::Null(target_type);
}
if self.data_type() == target_type {
return self;
}
match target_type {
DataType::Integer => match &self {
Value::Integer(v) => Value::Integer(*v),
Value::Float(v) => Value::Integer(*v as i64),
Value::Text(s) => s
.parse::<i64>()
.map(Value::Integer)
.unwrap_or(Value::Null(target_type)),
Value::Boolean(b) => Value::Integer(if *b { 1 } else { 0 }),
_ => Value::Null(target_type),
},
DataType::Float => match &self {
Value::Float(v) => Value::Float(*v),
Value::Integer(v) => Value::Float(*v as f64),
Value::Text(s) => s
.parse::<f64>()
.map(Value::Float)
.unwrap_or(Value::Null(target_type)),
Value::Boolean(b) => Value::Float(if *b { 1.0 } else { 0.0 }),
_ => Value::Null(target_type),
},
DataType::Text => match self {
Value::Text(s) => Value::Text(s),
Value::Integer(v) => Value::Text(SmartString::from_string(v.to_string())),
Value::Float(v) => Value::Text(SmartString::from_string(format_float(v))),
Value::Boolean(b) => {
Value::Text(SmartString::new(if b { "true" } else { "false" }))
}
Value::Timestamp(t) => Value::Text(SmartString::from_string(t.to_rfc3339())),
Value::Extension(data) if data.first() == Some(&(DataType::Json as u8)) => {
Value::Text(SmartString::new(
std::str::from_utf8(&data[1..]).unwrap_or(""),
))
}
Value::Extension(data) if data.first() == Some(&(DataType::Vector as u8)) => {
Value::Text(SmartString::from_string(format_vector_bytes(&data[1..])))
}
Value::Extension(_) | Value::Null(_) => Value::Null(target_type),
},
DataType::Boolean => match &self {
Value::Boolean(b) => Value::Boolean(*b),
Value::Integer(v) => Value::Boolean(*v != 0),
Value::Float(v) => Value::Boolean(*v != 0.0),
Value::Text(s) => {
let s_ref: &str = s.as_ref();
if s_ref.eq_ignore_ascii_case("true")
|| s_ref.eq_ignore_ascii_case("t")
|| s_ref.eq_ignore_ascii_case("yes")
|| s_ref.eq_ignore_ascii_case("y")
|| s_ref == "1"
{
Value::Boolean(true)
} else if s_ref.eq_ignore_ascii_case("false")
|| s_ref.eq_ignore_ascii_case("f")
|| s_ref.eq_ignore_ascii_case("no")
|| s_ref.eq_ignore_ascii_case("n")
|| s_ref == "0"
{
Value::Boolean(false)
} else {
Value::Null(target_type)
}
}
_ => Value::Null(target_type),
},
DataType::Timestamp => match self {
Value::Timestamp(t) => Value::Timestamp(t),
Value::Text(s) => parse_timestamp(&s)
.map(Value::Timestamp)
.unwrap_or(Value::Null(target_type)),
Value::Integer(nanos) => {
DateTime::from_timestamp(nanos / 1_000_000_000, (nanos % 1_000_000_000) as u32)
.map(Value::Timestamp)
.unwrap_or(Value::Null(target_type))
}
_ => Value::Null(target_type),
},
DataType::Json => match self {
Value::Extension(ref data) if data.first() == Some(&(DataType::Json as u8)) => self,
Value::Text(s) => {
if serde_json::from_str::<serde_json::Value>(s.as_str()).is_ok() {
Value::json(s.as_str())
} else {
Value::Null(target_type)
}
}
Value::Integer(v) => Value::json(v.to_string()),
Value::Float(v) => Value::json(format_float(v)),
Value::Boolean(b) => Value::json(if b { "true" } else { "false" }),
_ => Value::Null(target_type),
},
DataType::Vector => match self {
Value::Extension(ref data) if data.first() == Some(&(DataType::Vector as u8)) => {
self
}
Value::Text(s) => {
if let Some(floats) = parse_vector_str(s.as_str()) {
Value::vector(floats)
} else {
Value::Null(target_type)
}
}
_ => Value::Null(target_type),
},
DataType::Null => Value::Null(DataType::Null),
}
}
}
impl Default for Value {
fn default() -> Self {
Value::Null(DataType::Null)
}
}
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Value::Null(_) => write!(f, "NULL"),
Value::Integer(v) => write!(f, "{}", v),
Value::Float(v) => write!(f, "{}", format_float(*v)),
Value::Text(s) => write!(f, "{}", s),
Value::Boolean(b) => write!(f, "{}", if *b { "true" } else { "false" }),
Value::Timestamp(t) => write!(f, "{}", t.to_rfc3339()),
Value::Extension(data) => {
let tag = data.first().copied().unwrap_or(0);
if tag == DataType::Json as u8 {
write!(f, "{}", std::str::from_utf8(&data[1..]).unwrap_or(""))
} else if tag == DataType::Vector as u8 {
write!(f, "{}", format_vector_bytes(&data[1..]))
} else {
write!(f, "<extension:{}>", tag)
}
}
}
}
}
impl PartialEq for Value {
#[inline]
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Value::Null(_), Value::Null(_)) => true,
(Value::Null(_), _) | (_, Value::Null(_)) => false,
(Value::Integer(a), Value::Integer(b)) => a == b,
(Value::Float(a), Value::Float(b)) => {
if a.is_nan() && b.is_nan() {
true
} else {
a == b
}
}
(Value::Integer(i), Value::Float(f)) | (Value::Float(f), Value::Integer(i)) => {
*f == (*i as f64)
}
(Value::Text(a), Value::Text(b)) => a == b,
(Value::Boolean(a), Value::Boolean(b)) => a == b,
(Value::Timestamp(a), Value::Timestamp(b)) => a == b,
(Value::Extension(a), Value::Extension(b)) => a == b,
_ => false,
}
}
}
impl Eq for Value {}
const I64_SAFE_MAX: i64 = (1_i64 << 53) - 1;
const I64_SAFE_MIN: i64 = -I64_SAFE_MAX;
#[inline(always)]
fn wymix(a: u64, b: u64) -> u64 {
let r = (a as u128).wrapping_mul(b as u128);
(r as u64) ^ ((r >> 64) as u64)
}
const WY_P1: u64 = 0xa0761d6478bd642f;
const WY_P2: u64 = 0xe7037ed1a0b428db;
impl Hash for Value {
#[inline(always)]
fn hash<H: Hasher>(&self, state: &mut H) {
match self {
Value::Null(_) => {
state.write_u64(0);
}
Value::Integer(v) => {
match *v {
I64_SAFE_MIN..=I64_SAFE_MAX => {
state.write_u64(wymix(1 ^ (*v as u64), WY_P1));
}
_ => {
state.write_u64(wymix(1 ^ (*v as f64).to_bits(), WY_P1));
}
}
}
Value::Float(v) => {
if v.is_nan() {
state.write_u64(wymix(6 ^ f64::NAN.to_bits(), WY_P1));
} else if v.fract() == 0.0 {
match *v as i64 {
i @ I64_SAFE_MIN..=I64_SAFE_MAX => {
state.write_u64(wymix(1 ^ (i as u64), WY_P1));
}
_ => {
state.write_u64(wymix(1 ^ v.to_bits(), WY_P1));
}
}
} else {
state.write_u64(wymix(6 ^ v.to_bits(), WY_P1));
}
}
Value::Text(s) => {
let bytes = s.as_bytes();
let len = bytes.len();
let mut h = wymix(2 ^ (len as u64), WY_P1);
let chunks = len / 8;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let chunk = unsafe { (ptr.add(i * 8) as *const u64).read_unaligned() };
h = wymix(h ^ chunk, WY_P2);
}
let tail_start = chunks * 8;
if tail_start < len {
let mut tail = 0u64;
for (j, &b) in bytes[tail_start..].iter().enumerate() {
tail |= (b as u64) << (j * 8);
}
h = wymix(h ^ tail, WY_P1);
}
state.write_u64(h);
}
Value::Boolean(b) => {
state.write_u64(wymix(if *b { 5 } else { 4 }, WY_P1));
}
Value::Timestamp(t) => {
let nanos = t
.timestamp_nanos_opt()
.unwrap_or_else(|| t.timestamp().saturating_mul(1_000_000_000));
state.write_u64(wymix(3 ^ (nanos as u64), WY_P1));
}
Value::Extension(data) => {
let bytes: &[u8] = data;
let len = bytes.len();
let mut h = wymix(10 ^ (len as u64), WY_P1);
let chunks = len / 8;
let ptr = bytes.as_ptr();
for i in 0..chunks {
let chunk = unsafe { (ptr.add(i * 8) as *const u64).read_unaligned() };
h = wymix(h ^ chunk, WY_P2);
}
let tail_start = chunks * 8;
if tail_start < len {
let mut tail = 0u64;
for (j, &b) in bytes[tail_start..].iter().enumerate() {
tail |= (b as u64) << (j * 8);
}
h = wymix(h ^ tail, WY_P1);
}
state.write_u64(h);
}
}
}
}
#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd for Value {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.compare(other).ok()
}
}
impl Ord for Value {
fn cmp(&self, other: &Self) -> Ordering {
match (self.is_null(), other.is_null()) {
(true, true) => return Ordering::Equal,
(true, false) => return Ordering::Less,
(false, true) => return Ordering::Greater,
(false, false) => {} }
match (self, other) {
(Value::Integer(i), Value::Float(f)) => {
let i_as_f64 = *i as f64;
if f.is_nan() {
return Ordering::Less; }
return i_as_f64.partial_cmp(f).unwrap_or(Ordering::Equal);
}
(Value::Float(f), Value::Integer(i)) => {
let i_as_f64 = *i as f64;
if f.is_nan() {
return Ordering::Greater; }
return f.partial_cmp(&i_as_f64).unwrap_or(Ordering::Equal);
}
_ => {} }
fn type_discriminant(v: &Value) -> u8 {
match v {
Value::Null(_) => 0,
Value::Boolean(_) => 1,
Value::Integer(_) | Value::Float(_) => 2,
Value::Text(_) => 3,
Value::Timestamp(_) => 4,
Value::Extension(_) => 5,
}
}
let self_disc = type_discriminant(self);
let other_disc = type_discriminant(other);
if self_disc != other_disc {
return self_disc.cmp(&other_disc);
}
match (self, other) {
(Value::Integer(a), Value::Integer(b)) => a.cmp(b),
(Value::Float(a), Value::Float(b)) => {
match (a.is_nan(), b.is_nan()) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
(false, false) => a.partial_cmp(b).unwrap_or(Ordering::Equal),
}
}
(Value::Text(a), Value::Text(b)) => a.cmp(b),
(Value::Boolean(a), Value::Boolean(b)) => a.cmp(b),
(Value::Timestamp(a), Value::Timestamp(b)) => a.cmp(b),
(Value::Extension(a), Value::Extension(b)) => a.cmp(b),
_ => Ordering::Equal, }
}
}
impl From<i64> for Value {
fn from(v: i64) -> Self {
Value::Integer(v)
}
}
impl From<i32> for Value {
fn from(v: i32) -> Self {
Value::Integer(v as i64)
}
}
impl From<i16> for Value {
fn from(v: i16) -> Self {
Value::Integer(v as i64)
}
}
impl From<i8> for Value {
fn from(v: i8) -> Self {
Value::Integer(v as i64)
}
}
impl From<u32> for Value {
fn from(v: u32) -> Self {
Value::Integer(v as i64)
}
}
impl From<u16> for Value {
fn from(v: u16) -> Self {
Value::Integer(v as i64)
}
}
impl From<u8> for Value {
fn from(v: u8) -> Self {
Value::Integer(v as i64)
}
}
impl From<f64> for Value {
fn from(v: f64) -> Self {
Value::Float(v)
}
}
impl From<f32> for Value {
fn from(v: f32) -> Self {
Value::Float(v as f64)
}
}
impl From<String> for Value {
fn from(v: String) -> Self {
Value::Text(SmartString::from_string(v))
}
}
impl From<&str> for Value {
fn from(v: &str) -> Self {
Value::Text(SmartString::from(v))
}
}
impl From<Arc<str>> for Value {
fn from(v: Arc<str>) -> Self {
Value::Text(SmartString::from(v.as_ref()))
}
}
impl From<bool> for Value {
fn from(v: bool) -> Self {
Value::Boolean(v)
}
}
impl From<DateTime<Utc>> for Value {
fn from(v: DateTime<Utc>) -> Self {
Value::Timestamp(v)
}
}
impl<T: Into<Value>> From<Option<T>> for Value {
fn from(v: Option<T>) -> Self {
match v {
Some(val) => val.into(),
None => Value::Null(DataType::Null),
}
}
}
pub fn parse_timestamp(s: &str) -> Result<DateTime<Utc>> {
let s = s.trim();
for format in TIMESTAMP_FORMATS {
if let Ok(dt) = DateTime::parse_from_str(s, format) {
return Ok(dt.with_timezone(&Utc));
}
if let Ok(ndt) = NaiveDateTime::parse_from_str(s, format) {
return Ok(Utc.from_utc_datetime(&ndt));
}
}
if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
let datetime = date.and_hms_opt(0, 0, 0).unwrap();
return Ok(Utc.from_utc_datetime(&datetime));
}
for format in TIME_FORMATS {
if let Ok(time) = NaiveTime::parse_from_str(s, format) {
let today = Utc::now().date_naive();
let datetime = today.and_time(time);
return Ok(Utc.from_utc_datetime(&datetime));
}
}
Err(Error::parse(format!("invalid timestamp format: {}", s)))
}
fn format_float(v: f64) -> String {
if v.is_nan() {
return "NaN".to_string();
}
if v.is_infinite() {
return if v.is_sign_positive() {
"Infinity"
} else {
"-Infinity"
}
.to_string();
}
let abs_v = v.abs();
if abs_v != 0.0 && !(1e-4..1e15).contains(&abs_v) {
let s = format!("{:e}", v);
if let Some(e_pos) = s.find('e') {
let (mantissa, exp) = s.split_at(e_pos);
let clean_mantissa = if mantissa.contains('.') {
mantissa
.trim_end_matches('0')
.trim_end_matches('.')
.to_string()
} else {
mantissa.to_string()
};
return format!("{}{}", clean_mantissa, exp);
}
return s;
}
if v.fract() == 0.0 {
format!("{:.0}", v)
} else {
let s = format!("{:?}", v);
if s.contains('.') && !s.contains('e') && !s.contains('E') {
s.trim_end_matches('0').trim_end_matches('.').to_string()
} else {
s
}
}
}
pub fn format_vector_bytes(data: &[u8]) -> String {
let len = data.len() / 4;
let mut s = String::with_capacity(len * 8 + 2);
s.push('[');
for i in 0..len {
if i > 0 {
s.push_str(", ");
}
let f = f32::from_le_bytes([
data[i * 4],
data[i * 4 + 1],
data[i * 4 + 2],
data[i * 4 + 3],
]);
use std::fmt::Write;
if f.fract() == 0.0 && f.is_finite() {
let _ = write!(s, "{:.1}", f);
} else {
let _ = write!(s, "{}", f);
}
}
s.push(']');
s
}
pub fn parse_vector_str(s: &str) -> Option<Vec<f32>> {
let s = s.trim();
let inner = s.strip_prefix('[')?.strip_suffix(']')?;
if inner.trim().is_empty() {
return Some(Vec::new());
}
let mut result = Vec::new();
for part in inner.split(',') {
let val: f32 = part.trim().parse().ok()?;
result.push(val);
}
Some(result)
}
fn compare_floats(a: f64, b: f64) -> Ordering {
match (a.is_nan(), b.is_nan()) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
(false, false) => a.partial_cmp(&b).unwrap_or(Ordering::Equal),
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::{Datelike, Timelike};
#[test]
fn test_value_size() {
use std::mem::size_of;
assert_eq!(
size_of::<Value>(),
16,
"Value should be 16 bytes, got {}",
size_of::<Value>()
);
assert_eq!(
size_of::<Option<Value>>(),
16,
"Option<Value> should be 16 bytes (niche optimization), got {}",
size_of::<Option<Value>>()
);
}
#[test]
fn test_constructors() {
assert!(Value::null(DataType::Integer).is_null());
assert_eq!(Value::integer(42).as_int64(), Some(42));
assert_eq!(Value::float(3.5).as_float64(), Some(3.5));
assert_eq!(Value::text("hello").as_str(), Some("hello"));
assert_eq!(Value::boolean(true).as_boolean(), Some(true));
assert!(Value::json(r#"{"key": "value"}"#).as_json().is_some());
}
#[test]
fn test_from_implementations() {
let v: Value = 42i64.into();
assert_eq!(v.as_int64(), Some(42));
let v: Value = 3.5f64.into();
assert_eq!(v.as_float64(), Some(3.5));
let v: Value = "hello".into();
assert_eq!(v.as_str(), Some("hello"));
let v: Value = true.into();
assert_eq!(v.as_boolean(), Some(true));
let v: Value = Option::<i64>::None.into();
assert!(v.is_null());
let v: Value = Some(42i64).into();
assert_eq!(v.as_int64(), Some(42));
}
#[test]
fn test_data_type() {
assert_eq!(
Value::null(DataType::Integer).data_type(),
DataType::Integer
);
assert_eq!(Value::integer(42).data_type(), DataType::Integer);
assert_eq!(Value::float(3.5).data_type(), DataType::Float);
assert_eq!(Value::text("hello").data_type(), DataType::Text);
assert_eq!(Value::boolean(true).data_type(), DataType::Boolean);
assert_eq!(
Value::Timestamp(Utc::now()).data_type(),
DataType::Timestamp
);
assert_eq!(Value::json("{}").data_type(), DataType::Json);
}
#[test]
fn test_as_int64() {
assert_eq!(Value::integer(42).as_int64(), Some(42));
assert_eq!(Value::float(3.7).as_int64(), Some(3));
assert_eq!(Value::float(-3.7).as_int64(), Some(-3));
assert_eq!(Value::text("42").as_int64(), Some(42));
assert_eq!(Value::text("-42").as_int64(), Some(-42));
assert_eq!(Value::text("3.7").as_int64(), Some(3));
assert_eq!(Value::boolean(true).as_int64(), Some(1));
assert_eq!(Value::boolean(false).as_int64(), Some(0));
assert_eq!(Value::null(DataType::Integer).as_int64(), None);
assert_eq!(Value::text("not a number").as_int64(), None);
}
#[test]
fn test_as_float64() {
assert_eq!(Value::float(3.5).as_float64(), Some(3.5));
assert_eq!(Value::integer(42).as_float64(), Some(42.0));
assert_eq!(Value::text("3.5").as_float64(), Some(3.5));
assert_eq!(Value::boolean(true).as_float64(), Some(1.0));
assert_eq!(Value::boolean(false).as_float64(), Some(0.0));
assert_eq!(Value::null(DataType::Float).as_float64(), None);
}
#[test]
fn test_as_boolean() {
assert_eq!(Value::boolean(true).as_boolean(), Some(true));
assert_eq!(Value::boolean(false).as_boolean(), Some(false));
assert_eq!(Value::integer(1).as_boolean(), Some(true));
assert_eq!(Value::integer(0).as_boolean(), Some(false));
assert_eq!(Value::integer(-1).as_boolean(), Some(true));
assert_eq!(Value::float(1.0).as_boolean(), Some(true));
assert_eq!(Value::float(0.0).as_boolean(), Some(false));
assert_eq!(Value::text("true").as_boolean(), Some(true));
assert_eq!(Value::text("TRUE").as_boolean(), Some(true));
assert_eq!(Value::text("t").as_boolean(), Some(true));
assert_eq!(Value::text("yes").as_boolean(), Some(true));
assert_eq!(Value::text("y").as_boolean(), Some(true));
assert_eq!(Value::text("1").as_boolean(), Some(true));
assert_eq!(Value::text("false").as_boolean(), Some(false));
assert_eq!(Value::text("FALSE").as_boolean(), Some(false));
assert_eq!(Value::text("f").as_boolean(), Some(false));
assert_eq!(Value::text("no").as_boolean(), Some(false));
assert_eq!(Value::text("n").as_boolean(), Some(false));
assert_eq!(Value::text("0").as_boolean(), Some(false));
assert_eq!(Value::text("").as_boolean(), Some(false));
assert_eq!(Value::text("42").as_boolean(), Some(true));
assert_eq!(Value::text("0.0").as_boolean(), Some(false));
}
#[test]
fn test_as_string() {
assert_eq!(Value::text("hello").as_string(), Some("hello".to_string()));
assert_eq!(Value::integer(42).as_string(), Some("42".to_string()));
assert_eq!(Value::float(3.5).as_string(), Some("3.5".to_string()));
assert_eq!(Value::boolean(true).as_string(), Some("true".to_string()));
assert_eq!(Value::boolean(false).as_string(), Some("false".to_string()));
assert_eq!(Value::null(DataType::Text).as_string(), None);
}
#[test]
fn test_equality() {
assert_eq!(Value::integer(42), Value::integer(42));
assert_ne!(Value::integer(42), Value::integer(43));
assert_eq!(Value::float(3.5), Value::float(3.5));
assert_ne!(Value::float(3.5), Value::float(3.15));
assert_eq!(Value::text("hello"), Value::text("hello"));
assert_ne!(Value::text("hello"), Value::text("world"));
assert_eq!(Value::boolean(true), Value::boolean(true));
assert_ne!(Value::boolean(true), Value::boolean(false));
assert_eq!(Value::null(DataType::Integer), Value::null(DataType::Float));
assert_ne!(Value::null(DataType::Integer), Value::integer(0));
assert_eq!(Value::integer(1), Value::float(1.0));
assert_eq!(Value::integer(5), Value::float(5.0));
assert_ne!(Value::integer(1), Value::float(1.5));
assert_ne!(Value::text("1"), Value::integer(1));
}
#[test]
fn test_float_nan_equality() {
let nan = Value::float(f64::NAN);
assert_eq!(nan, nan.clone());
}
#[test]
fn test_compare_integers() {
assert_eq!(
Value::integer(1).compare(&Value::integer(2)).unwrap(),
Ordering::Less
);
assert_eq!(
Value::integer(2).compare(&Value::integer(2)).unwrap(),
Ordering::Equal
);
assert_eq!(
Value::integer(3).compare(&Value::integer(2)).unwrap(),
Ordering::Greater
);
}
#[test]
fn test_compare_floats() {
assert_eq!(
Value::float(1.0).compare(&Value::float(2.0)).unwrap(),
Ordering::Less
);
assert_eq!(
Value::float(2.0).compare(&Value::float(2.0)).unwrap(),
Ordering::Equal
);
assert_eq!(
Value::float(3.0).compare(&Value::float(2.0)).unwrap(),
Ordering::Greater
);
}
#[test]
fn test_compare_cross_type_numeric() {
assert_eq!(
Value::integer(1).compare(&Value::float(2.0)).unwrap(),
Ordering::Less
);
assert_eq!(
Value::integer(2).compare(&Value::float(2.0)).unwrap(),
Ordering::Equal
);
assert_eq!(
Value::float(3.0).compare(&Value::integer(2)).unwrap(),
Ordering::Greater
);
}
#[test]
fn test_compare_strings() {
assert_eq!(
Value::text("a").compare(&Value::text("b")).unwrap(),
Ordering::Less
);
assert_eq!(
Value::text("b").compare(&Value::text("b")).unwrap(),
Ordering::Equal
);
assert_eq!(
Value::text("c").compare(&Value::text("b")).unwrap(),
Ordering::Greater
);
}
#[test]
fn test_compare_null() {
assert_eq!(
Value::null(DataType::Integer)
.compare(&Value::null(DataType::Float))
.unwrap(),
Ordering::Equal
);
assert!(Value::null(DataType::Integer)
.compare(&Value::integer(0))
.is_err());
assert!(Value::integer(0)
.compare(&Value::null(DataType::Integer))
.is_err());
}
#[test]
fn test_compare_json_error() {
let j1 = Value::json(r#"{"a": 1}"#);
let j2 = Value::json(r#"{"b": 2}"#);
assert!(j1.compare(&j2).is_err());
let j3 = Value::json(r#"{"a": 1}"#);
assert_eq!(j1.compare(&j3).unwrap(), Ordering::Equal);
}
#[test]
fn test_parse_timestamp() {
let ts = parse_timestamp("2024-01-15T10:30:00Z").unwrap();
assert_eq!(ts.year(), 2024);
assert_eq!(ts.month(), 1);
assert_eq!(ts.day(), 15);
assert_eq!(ts.hour(), 10);
assert_eq!(ts.minute(), 30);
let ts = parse_timestamp("2024-01-15 10:30:00").unwrap();
assert_eq!(ts.year(), 2024);
let ts = parse_timestamp("2024-01-15").unwrap();
assert_eq!(ts.year(), 2024);
assert_eq!(ts.hour(), 0);
assert!(parse_timestamp("not a date").is_err());
}
#[test]
fn test_display() {
assert_eq!(Value::null(DataType::Integer).to_string(), "NULL");
assert_eq!(Value::integer(42).to_string(), "42");
assert_eq!(Value::float(3.5).to_string(), "3.5");
assert_eq!(Value::text("hello").to_string(), "hello");
assert_eq!(Value::boolean(true).to_string(), "true");
assert_eq!(Value::boolean(false).to_string(), "false");
}
#[test]
fn test_hash() {
use rustc_hash::FxHashSet;
let mut set = FxHashSet::default();
set.insert(Value::integer(42));
set.insert(Value::integer(42)); set.insert(Value::integer(43));
assert_eq!(set.len(), 2);
assert!(set.contains(&Value::integer(42)));
assert!(set.contains(&Value::integer(43)));
}
#[test]
fn test_hash_integer_float_consistency() {
use std::hash::{DefaultHasher, Hash, Hasher};
fn hash_value(v: &Value) -> u64 {
let mut hasher = DefaultHasher::new();
v.hash(&mut hasher);
hasher.finish()
}
assert_eq!(
hash_value(&Value::integer(5)),
hash_value(&Value::float(5.0))
);
assert_eq!(
hash_value(&Value::integer(-100)),
hash_value(&Value::float(-100.0))
);
assert_eq!(
hash_value(&Value::integer(0)),
hash_value(&Value::float(0.0))
);
assert_ne!(
hash_value(&Value::float(5.5)),
hash_value(&Value::integer(5))
);
assert_ne!(
hash_value(&Value::float(5.5)),
hash_value(&Value::integer(6))
);
let safe_max = (1_i64 << 53) - 1; assert_eq!(
hash_value(&Value::integer(safe_max)),
hash_value(&Value::float(safe_max as f64))
);
assert_eq!(
hash_value(&Value::integer(-safe_max)),
hash_value(&Value::float(-safe_max as f64))
);
let boundary = 1_i64 << 53; assert_eq!(
hash_value(&Value::integer(boundary)),
hash_value(&Value::float(boundary as f64))
);
let large = boundary + 1; let large_as_f64 = large as f64; assert_eq!(
hash_value(&Value::integer(large)),
hash_value(&Value::float(large_as_f64))
);
}
#[test]
fn test_hash_in_hashmap() {
use rustc_hash::FxHashMap;
let mut map = FxHashMap::default();
map.insert(Value::integer(42), "int");
assert_eq!(map.get(&Value::float(42.0)), Some(&"int"));
map.insert(Value::float(42.0), "float");
assert_eq!(map.len(), 1);
assert_eq!(map.get(&Value::integer(42)), Some(&"float"));
}
#[test]
fn test_hash_nan_consistency() {
use std::hash::{DefaultHasher, Hash, Hasher};
fn hash_value(v: &Value) -> u64 {
let mut hasher = DefaultHasher::new();
v.hash(&mut hasher);
hasher.finish()
}
let nan1 = Value::float(f64::NAN);
let nan2 = Value::float(f64::from_bits(0x7ff8000000000001)); let nan3 = Value::float(f64::INFINITY - f64::INFINITY);
assert_eq!(hash_value(&nan1), hash_value(&nan2));
assert_eq!(hash_value(&nan2), hash_value(&nan3));
assert_eq!(nan1, nan2);
assert_eq!(nan2, nan3);
}
}