use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap};
use std::fmt;
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone)]
pub enum FilterCondition {
Equals(Value),
NotEquals(Value),
GreaterThan(Value),
GreaterThanEquals(Value),
LessThan(Value),
LessThanEquals(Value),
In(Vec<Value>),
Between(Value, Value), IsNull,
IsNotNull,
Contains(Value),
StartsWith(Value),
EndsWith(Value),
Regex(String),
Not(Box<FilterCondition>),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum Value {
UniqueId(u32),
Int64(i64),
Float64(f64),
String(String),
Boolean(bool),
DateTime(NaiveDate),
Point {
lat: f64,
lon: f64,
},
Null,
NodeRef(u32),
Duration {
months: i32,
days: i32,
seconds: i64,
},
Node(Box<NodeValue>),
Relationship(Box<RelValue>),
Path(Box<PathValue>),
List(Vec<Value>),
Map(BTreeMap<String, Value>),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct NodeValue {
pub id: u32,
pub labels: Vec<String>,
pub properties: BTreeMap<String, Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RelValue {
pub id: u32,
pub start_id: u32,
pub end_id: u32,
pub rel_type: String,
pub properties: BTreeMap<String, Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PathValue {
pub nodes: Vec<NodeValue>,
pub rels: Vec<RelValue>,
}
#[derive(Clone, Copy, Debug)]
pub enum BorrowedValue<'a> {
Null,
Boolean(bool),
Int64(i64),
Float64(f64),
UniqueId(u32),
String(&'a str),
DateTime(NaiveDate),
}
impl<'a> BorrowedValue<'a> {
pub fn to_value(self) -> Value {
match self {
BorrowedValue::Null => Value::Null,
BorrowedValue::Boolean(b) => Value::Boolean(b),
BorrowedValue::Int64(v) => Value::Int64(v),
BorrowedValue::Float64(v) => Value::Float64(v),
BorrowedValue::UniqueId(v) => Value::UniqueId(v),
BorrowedValue::String(s) => Value::String(s.to_string()),
BorrowedValue::DateTime(d) => Value::DateTime(d),
}
}
}
impl Eq for Value {
}
impl PartialOrd for Value {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Value {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
use std::cmp::Ordering;
fn disc(v: &Value) -> u8 {
match v {
Value::Null => 0,
Value::Boolean(_) => 1,
Value::UniqueId(_) => 2,
Value::Int64(_) => 3,
Value::Float64(_) => 4,
Value::String(_) => 5,
Value::DateTime(_) => 6,
Value::Duration { .. } => 7,
Value::Point { .. } => 8,
Value::NodeRef(_) => 9,
Value::List(_) => 10,
Value::Map(_) => 11,
Value::Node(_) => 12,
Value::Relationship(_) => 13,
Value::Path(_) => 14,
}
}
match (self, other) {
(Value::Null, Value::Null) => Ordering::Equal,
(Value::Boolean(a), Value::Boolean(b)) => a.cmp(b),
(Value::UniqueId(a), Value::UniqueId(b)) => a.cmp(b),
(Value::Int64(a), Value::Int64(b)) => a.cmp(b),
(Value::Float64(a), Value::Float64(b)) => {
a.partial_cmp(b).unwrap_or_else(|| {
match (a.is_nan(), b.is_nan()) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Greater,
(false, true) => Ordering::Less,
_ => unreachable!(),
}
})
}
(Value::String(a), Value::String(b)) => a.cmp(b),
(Value::DateTime(a), Value::DateTime(b)) => a.cmp(b),
(
Value::Point {
lat: a_lat,
lon: a_lon,
},
Value::Point {
lat: b_lat,
lon: b_lon,
},
) => a_lat
.partial_cmp(b_lat)
.unwrap_or(Ordering::Equal)
.then(a_lon.partial_cmp(b_lon).unwrap_or(Ordering::Equal)),
(Value::NodeRef(a), Value::NodeRef(b)) => a.cmp(b),
(
Value::Duration {
months: am,
days: ad,
seconds: as_,
},
Value::Duration {
months: bm,
days: bd,
seconds: bs,
},
) => am.cmp(bm).then(ad.cmp(bd)).then(as_.cmp(bs)),
(Value::List(a), Value::List(b)) => a.cmp(b),
(Value::Map(a), Value::Map(b)) => a.cmp(b),
(Value::Node(a), Value::Node(b)) => a.cmp(b),
(Value::Relationship(a), Value::Relationship(b)) => a.cmp(b),
(Value::Path(a), Value::Path(b)) => a.cmp(b),
_ => disc(self).cmp(&disc(other)),
}
}
}
impl Hash for Value {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);
match self {
Value::UniqueId(v) => v.hash(state),
Value::Int64(v) => v.hash(state),
Value::Float64(v) => {
if v.is_nan() {
f64::NAN.to_bits().hash(state)
} else {
if *v == 0.0 {
0.0f64.to_bits().hash(state)
} else {
v.to_bits().hash(state)
}
}
}
Value::String(v) => v.hash(state),
Value::Boolean(v) => v.hash(state),
Value::DateTime(v) => v.hash(state),
Value::Point { lat, lon } => {
lat.to_bits().hash(state);
lon.to_bits().hash(state);
}
Value::Duration {
months,
days,
seconds,
} => {
months.hash(state);
days.hash(state);
seconds.hash(state);
}
Value::Null => 0.hash(state),
Value::NodeRef(v) => v.hash(state),
Value::List(v) => v.hash(state),
Value::Map(v) => {
v.len().hash(state);
for (k, val) in v {
k.hash(state);
val.hash(state);
}
}
Value::Node(v) => v.hash(state),
Value::Relationship(v) => v.hash(state),
Value::Path(v) => v.hash(state),
}
}
}
impl Value {
pub fn as_string(&self) -> Option<String> {
match self {
Value::String(s) => Some(s.clone()),
_ => None,
}
}
pub fn type_name(&self) -> &'static str {
match self {
Value::Null => "Null",
Value::Boolean(_) => "Boolean",
Value::Int64(_) => "Int64",
Value::Float64(_) => "Float64",
Value::UniqueId(_) => "UniqueId",
Value::String(_) => "String",
Value::DateTime(_) => "DateTime",
Value::Point { .. } => "Point",
Value::NodeRef(_) => "NodeRef",
Value::Duration { .. } => "Duration",
Value::List(_) => "List",
Value::Map(_) => "Map",
Value::Node(_) => "Node",
Value::Relationship(_) => "Relationship",
Value::Path(_) => "Path",
}
}
}
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", format_value(self))
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ColumnType {
UniqueId,
Int64,
Float64,
String,
Boolean,
DateTime,
}
impl fmt::Display for ColumnType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let type_str = match self {
ColumnType::UniqueId => "UniqueId",
ColumnType::Int64 => "Int64",
ColumnType::Float64 => "Float64",
ColumnType::String => "String",
ColumnType::Boolean => "Boolean",
ColumnType::DateTime => "DateTime",
};
write!(f, "{}", type_str)
}
}
#[derive(Debug)]
pub struct Column {
pub(crate) name: String,
pub(crate) col_type: ColumnType,
pub(crate) data: ColumnData,
}
#[derive(Debug)]
pub enum ColumnData {
UniqueId(Vec<Option<u32>>),
Int64(Vec<Option<i64>>),
Float64(Vec<Option<f64>>),
String(Vec<Option<String>>),
Boolean(Vec<Option<bool>>),
DateTime(Vec<Option<NaiveDate>>),
}
#[derive(Debug)]
pub struct DataFrame {
columns: Vec<Column>,
column_indices: HashMap<String, usize>,
}
impl Column {
fn get_value(&self, row_idx: usize) -> Option<Value> {
match &self.data {
ColumnData::UniqueId(vec) => vec.get(row_idx)?.map(Value::UniqueId),
ColumnData::Int64(vec) => vec.get(row_idx)?.map(Value::Int64),
ColumnData::Float64(vec) => vec.get(row_idx)?.map(Value::Float64),
ColumnData::String(vec) => vec.get(row_idx)?.as_ref().map(|s| Value::String(s.clone())),
ColumnData::Boolean(vec) => vec.get(row_idx)?.map(Value::Boolean),
ColumnData::DateTime(vec) => vec.get(row_idx)?.map(Value::DateTime),
}
}
fn len(&self) -> usize {
match &self.data {
ColumnData::UniqueId(vec) => vec.len(),
ColumnData::Int64(vec) => vec.len(),
ColumnData::Float64(vec) => vec.len(),
ColumnData::String(vec) => vec.len(),
ColumnData::Boolean(vec) => vec.len(),
ColumnData::DateTime(vec) => vec.len(),
}
}
}
impl DataFrame {
pub fn new(columns: Vec<(String, ColumnType)>) -> Self {
let mut column_indices = HashMap::with_capacity(columns.len());
let columns: Vec<Column> = columns
.into_iter()
.enumerate()
.map(|(idx, (name, col_type))| {
let data = match col_type {
ColumnType::UniqueId => ColumnData::UniqueId(Vec::new()),
ColumnType::Int64 => ColumnData::Int64(Vec::new()),
ColumnType::Float64 => ColumnData::Float64(Vec::new()),
ColumnType::String => ColumnData::String(Vec::new()),
ColumnType::Boolean => ColumnData::Boolean(Vec::new()),
ColumnType::DateTime => ColumnData::DateTime(Vec::new()),
};
column_indices.insert(name.clone(), idx);
Column {
name,
col_type,
data,
}
})
.collect();
DataFrame {
columns,
column_indices,
}
}
pub fn get_value(&self, row: usize, column: &str) -> Option<Value> {
self.column_indices
.get(column)
.and_then(|&idx| self.columns.get(idx))
.and_then(|col| col.get_value(row))
}
pub fn get_value_by_index(&self, row_idx: usize, col_idx: usize) -> Option<Value> {
self.columns
.get(col_idx)
.and_then(|col| col.get_value(row_idx))
}
pub fn get_column_index(&self, name: &str) -> Option<usize> {
self.column_indices.get(name).copied()
}
pub fn verify_column(&self, name: &str) -> bool {
self.column_indices.contains_key(name)
}
pub fn row_count(&self) -> usize {
self.columns.first().map_or(0, |col| col.len())
}
pub fn column_count(&self) -> usize {
self.columns.len()
}
pub fn get_column_names(&self) -> Vec<String> {
self.columns.iter().map(|col| col.name.clone()).collect()
}
pub fn get_column_type(&self, col_name: &str) -> ColumnType {
self.column_indices
.get(col_name)
.and_then(|&idx| self.columns.get(idx))
.map(|col| col.col_type.clone())
.unwrap_or_else(|| panic!("Column {} not found", col_name))
}
pub fn add_column(
&mut self,
name: String,
col_type: ColumnType,
data: ColumnData,
) -> Result<(), String> {
if self.column_indices.contains_key(&name) {
return Err(format!("Column {} already exists", name));
}
match (&col_type, &data) {
(ColumnType::UniqueId, ColumnData::UniqueId(_))
| (ColumnType::Int64, ColumnData::Int64(_))
| (ColumnType::Float64, ColumnData::Float64(_))
| (ColumnType::String, ColumnData::String(_))
| (ColumnType::Boolean, ColumnData::Boolean(_))
| (ColumnType::DateTime, ColumnData::DateTime(_)) => (),
_ => return Err(format!("Data type mismatch for column {}", name)),
}
let idx = self.columns.len();
self.column_indices.insert(name.clone(), idx);
self.columns.push(Column {
name,
col_type,
data,
});
Ok(())
}
pub fn from_cypher_rows(columns: Vec<String>, rows: Vec<Vec<Value>>) -> Result<Self, String> {
let num_cols = columns.len();
let num_rows = rows.len();
if num_rows == 0 {
let col_specs: Vec<(String, ColumnType)> = columns
.into_iter()
.map(|name| (name, ColumnType::Int64))
.collect();
return Ok(DataFrame::new(col_specs));
}
for (i, row) in rows.iter().enumerate() {
if row.len() != num_cols {
return Err(format!(
"Row {} has {} values but expected {} columns",
i,
row.len(),
num_cols
));
}
}
let mut col_types = vec![None; num_cols];
for row in &rows {
for (col_idx, val) in row.iter().enumerate() {
if col_types[col_idx].is_some() {
continue;
}
col_types[col_idx] = match val {
Value::UniqueId(_) => Some(ColumnType::UniqueId),
Value::Int64(_) => Some(ColumnType::Int64),
Value::Float64(_) => Some(ColumnType::Float64),
Value::String(_) => Some(ColumnType::String),
Value::Boolean(_) => Some(ColumnType::Boolean),
Value::DateTime(_) => Some(ColumnType::DateTime),
Value::Point { .. } => Some(ColumnType::String), Value::Duration { .. } => Some(ColumnType::String),
Value::List(_)
| Value::Map(_)
| Value::Node(_)
| Value::Relationship(_)
| Value::Path(_) => Some(ColumnType::String),
Value::Null | Value::NodeRef(_) => None,
};
}
if col_types.iter().all(|t| t.is_some()) {
break;
}
}
let col_types: Vec<ColumnType> = col_types
.into_iter()
.map(|t| t.unwrap_or(ColumnType::Int64))
.collect();
let mut col_data: Vec<ColumnData> = col_types
.iter()
.map(|ct| match ct {
ColumnType::UniqueId => ColumnData::UniqueId(Vec::with_capacity(num_rows)),
ColumnType::Int64 => ColumnData::Int64(Vec::with_capacity(num_rows)),
ColumnType::Float64 => ColumnData::Float64(Vec::with_capacity(num_rows)),
ColumnType::String => ColumnData::String(Vec::with_capacity(num_rows)),
ColumnType::Boolean => ColumnData::Boolean(Vec::with_capacity(num_rows)),
ColumnType::DateTime => ColumnData::DateTime(Vec::with_capacity(num_rows)),
})
.collect();
for row in rows {
for (col_idx, val) in row.into_iter().enumerate() {
match &mut col_data[col_idx] {
ColumnData::UniqueId(vec) => match val {
Value::UniqueId(v) => vec.push(Some(v)),
Value::Int64(v) => vec.push(Some(v as u32)),
Value::Null => vec.push(None),
_ => vec.push(None),
},
ColumnData::Int64(vec) => match val {
Value::Int64(v) => vec.push(Some(v)),
Value::UniqueId(v) => vec.push(Some(v as i64)),
Value::Float64(v) => vec.push(Some(v as i64)),
Value::Null => vec.push(None),
_ => vec.push(None),
},
ColumnData::Float64(vec) => match val {
Value::Float64(v) => vec.push(Some(v)),
Value::Int64(v) => vec.push(Some(v as f64)),
Value::UniqueId(v) => vec.push(Some(v as f64)),
Value::Null => vec.push(None),
_ => vec.push(None),
},
ColumnData::String(vec) => match val {
Value::String(v) => vec.push(Some(v)),
Value::Point { lat, lon } => {
vec.push(Some(format!("POINT({} {})", lon, lat)))
}
Value::Int64(v) => vec.push(Some(v.to_string())),
Value::Float64(v) => vec.push(Some(v.to_string())),
Value::UniqueId(v) => vec.push(Some(v.to_string())),
Value::Boolean(v) => vec.push(Some(v.to_string())),
Value::DateTime(v) => vec.push(Some(v.to_string())),
Value::Null => vec.push(None),
_ => vec.push(None),
},
ColumnData::Boolean(vec) => match val {
Value::Boolean(v) => vec.push(Some(v)),
Value::Null => vec.push(None),
_ => vec.push(None),
},
ColumnData::DateTime(vec) => match val {
Value::DateTime(v) => vec.push(Some(v)),
Value::Null => vec.push(None),
_ => vec.push(None),
},
}
}
}
let mut column_indices = HashMap::with_capacity(num_cols);
let built_columns: Vec<Column> = columns
.into_iter()
.zip(col_types)
.zip(col_data)
.enumerate()
.map(|(idx, ((name, col_type), data))| {
column_indices.insert(name.clone(), idx);
Column {
name,
col_type,
data,
}
})
.collect();
Ok(DataFrame {
columns: built_columns,
column_indices,
})
}
pub fn add_constant_column(&mut self, name: String, value: Value) -> Result<(), String> {
let num_rows = self.row_count();
let (col_type, data) = match value {
Value::UniqueId(v) => (
ColumnType::UniqueId,
ColumnData::UniqueId(vec![Some(v); num_rows]),
),
Value::Int64(v) => (
ColumnType::Int64,
ColumnData::Int64(vec![Some(v); num_rows]),
),
Value::Float64(v) => (
ColumnType::Float64,
ColumnData::Float64(vec![Some(v); num_rows]),
),
Value::String(v) => (
ColumnType::String,
ColumnData::String(vec![Some(v); num_rows]),
),
Value::Boolean(v) => (
ColumnType::Boolean,
ColumnData::Boolean(vec![Some(v); num_rows]),
),
Value::DateTime(v) => (
ColumnType::DateTime,
ColumnData::DateTime(vec![Some(v); num_rows]),
),
Value::Null => return Err("Cannot add a constant column with Null value".to_string()),
Value::Point { lat, lon } => (
ColumnType::String,
ColumnData::String(vec![Some(format!("POINT({} {})", lon, lat)); num_rows]),
),
Value::NodeRef(_) => {
return Err("Cannot add a constant column with NodeRef value".to_string())
}
Value::Duration { .. } => {
return Err(
"Cannot add a constant column with Duration value — durations are \
query-time-only (0.9.0 Cluster 2)"
.to_string(),
)
}
Value::List(_)
| Value::Map(_)
| Value::Node(_)
| Value::Relationship(_)
| Value::Path(_) => {
return Err(
"Cannot add a constant column with List/Map/Node/Relationship/Path value \
— collection and graph-entity variants are query-result-time values, \
not column types"
.to_string(),
)
}
};
self.add_column(name, col_type, data)
}
}
impl std::fmt::Display for DataFrame {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let row_limit = 10.min(self.row_count());
let columns = self.get_column_names();
let mut col_widths: Vec<usize> = columns.iter().map(|col| col.len()).collect();
for (col_idx, col) in self.columns.iter().enumerate() {
let type_width = format_col_type(&col.col_type).len();
col_widths[col_idx] = col_widths[col_idx].max(type_width);
for row_idx in 0..row_limit {
if let Some(value) = col.get_value(row_idx) {
col_widths[col_idx] = col_widths[col_idx].max(format_value(&value).len());
}
}
}
let format_row = |values: Vec<String>| -> String {
values
.into_iter()
.enumerate()
.map(|(i, val)| format!(" {:^width$} ", val, width = col_widths[i]))
.collect::<Vec<_>>()
.join("|")
};
writeln!(f, "\n| # |{}|", format_row(columns))?;
let type_row: Vec<String> = self
.columns
.iter()
.map(|col| format_col_type(&col.col_type))
.collect();
writeln!(f, "| |{}|", format_row(type_row))?;
let separator = col_widths
.iter()
.map(|w| format!("{:-^width$}", "-", width = w + 2))
.collect::<Vec<_>>()
.join("|");
writeln!(f, "|----|{}|", separator)?;
for row_idx in 0..row_limit {
let row_data: Vec<String> = (0..self.column_count())
.map(|col_idx| {
format_value(
&self
.get_value_by_index(row_idx, col_idx)
.unwrap_or(Value::Null),
)
})
.collect();
writeln!(f, "| {:^2} |{}|", row_idx, format_row(row_data))?;
}
if self.row_count() > row_limit {
let more_row = format_row(col_widths.iter().map(|_| "...".to_string()).collect());
writeln!(f, "| .. |{}|", more_row)?;
}
Ok(())
}
}
pub fn raw_string(value: &Value) -> String {
match value {
Value::String(s) => s.clone(),
Value::Int64(n) => n.to_string(),
Value::Float64(f) => f.to_string(),
Value::Boolean(b) => b.to_string(),
Value::DateTime(dt) => dt.to_string(),
Value::UniqueId(id) => id.to_string(),
Value::Point { lat, lon } => format!("point({}, {})", lat, lon),
Value::Duration {
months,
days,
seconds,
} => format!("duration(M={}, D={}, S={})", months, days, seconds),
Value::Null => String::new(),
Value::NodeRef(idx) => format!("node#{}", idx),
Value::List(_)
| Value::Map(_)
| Value::Node(_)
| Value::Relationship(_)
| Value::Path(_) => format_value(value),
}
}
pub fn format_value(value: &Value) -> String {
match value {
Value::UniqueId(v) => format!("{}", v),
Value::Int64(v) => format!("{}", v),
Value::Float64(v) => {
if v.is_nan() {
"NULL".to_string()
} else {
format!("{:.2}", v)
}
}
Value::String(v) => format!("\"{}\"", v),
Value::Boolean(v) => format!("{}", v),
Value::DateTime(v) => format!("\"{}\"", v.format("%Y-%m-%d")),
Value::Point { lat, lon } => format!("point({}, {})", lat, lon),
Value::Null => "NULL".to_string(),
Value::NodeRef(idx) => format!("node#{}", idx),
Value::Duration {
months,
days,
seconds,
} => format!(
"duration(months={}, days={}, seconds={})",
months, days, seconds
),
Value::List(items) => {
let inner: Vec<String> = items.iter().map(format_value).collect();
format!("[{}]", inner.join(", "))
}
Value::Map(entries) => {
let inner: Vec<String> = entries
.iter()
.map(|(k, v)| format!("{}: {}", k, format_value(v)))
.collect();
format!("{{{}}}", inner.join(", "))
}
Value::Node(n) => {
format!("(:{} {{id: {}}})", n.labels.join(":"), n.id)
}
Value::Relationship(r) => {
format!(
"[:{} {{id: {}, start: {}, end: {}}}]",
r.rel_type, r.id, r.start_id, r.end_id
)
}
Value::Path(p) => {
format!("path(nodes={}, rels={})", p.nodes.len(), p.rels.len())
}
}
}
fn format_col_type(col_type: &ColumnType) -> String {
match col_type {
ColumnType::UniqueId => "uID",
ColumnType::Int64 => "i64",
ColumnType::Float64 => "f64",
ColumnType::String => "str",
ColumnType::Boolean => "bool",
ColumnType::DateTime => "datetime",
}
.to_string()
}
#[cfg(test)]
#[allow(clippy::approx_constant)]
mod tests {
use super::*;
#[test]
fn test_as_string_with_string_value() {
let v = Value::String("hello".to_string());
assert_eq!(v.as_string(), Some("hello".to_string()));
}
#[test]
fn test_as_string_with_non_string_values() {
assert_eq!(Value::Int64(42).as_string(), None);
assert_eq!(Value::Float64(3.14).as_string(), None);
assert_eq!(Value::Boolean(true).as_string(), None);
assert_eq!(Value::Null.as_string(), None);
assert_eq!(Value::UniqueId(1).as_string(), None);
}
#[test]
fn test_value_equality_same_types() {
assert_eq!(Value::Int64(42), Value::Int64(42));
assert_eq!(Value::Float64(3.14), Value::Float64(3.14));
assert_eq!(
Value::String("a".to_string()),
Value::String("a".to_string())
);
assert_eq!(Value::Boolean(true), Value::Boolean(true));
assert_eq!(Value::Null, Value::Null);
assert_eq!(Value::UniqueId(5), Value::UniqueId(5));
}
#[test]
fn test_value_inequality() {
assert_ne!(Value::Int64(1), Value::Int64(2));
assert_ne!(
Value::String("a".to_string()),
Value::String("b".to_string())
);
assert_ne!(Value::Boolean(true), Value::Boolean(false));
}
#[test]
fn test_value_hash_consistency() {
use std::collections::HashSet;
let mut set = HashSet::new();
set.insert(Value::Int64(42));
set.insert(Value::Int64(42)); assert_eq!(set.len(), 1);
set.insert(Value::String("test".to_string()));
assert_eq!(set.len(), 2);
set.insert(Value::Null);
set.insert(Value::Null); assert_eq!(set.len(), 3);
}
#[test]
fn test_float_hash_negative_zero() {
use std::collections::HashSet;
let mut set = HashSet::new();
set.insert(Value::Float64(0.0));
set.insert(Value::Float64(-0.0));
assert_eq!(set.len(), 1);
}
#[test]
fn test_format_value_types() {
assert_eq!(format_value(&Value::UniqueId(42)), "42");
assert_eq!(format_value(&Value::Int64(-5)), "-5");
assert_eq!(format_value(&Value::Float64(3.14)), "3.14");
assert_eq!(format_value(&Value::String("hi".to_string())), "\"hi\"");
assert_eq!(format_value(&Value::Boolean(true)), "true");
assert_eq!(format_value(&Value::Null), "NULL");
}
#[test]
fn test_format_value_nan_is_null() {
assert_eq!(format_value(&Value::Float64(f64::NAN)), "NULL");
}
#[test]
fn test_column_type_display() {
assert_eq!(format!("{}", ColumnType::UniqueId), "UniqueId");
assert_eq!(format!("{}", ColumnType::Int64), "Int64");
assert_eq!(format!("{}", ColumnType::Float64), "Float64");
assert_eq!(format!("{}", ColumnType::String), "String");
assert_eq!(format!("{}", ColumnType::Boolean), "Boolean");
assert_eq!(format!("{}", ColumnType::DateTime), "DateTime");
}
#[test]
fn test_dataframe_new_empty() {
let df = DataFrame::new(vec![
("id".to_string(), ColumnType::Int64),
("name".to_string(), ColumnType::String),
]);
assert_eq!(df.row_count(), 0);
assert_eq!(df.column_count(), 2);
assert!(df.verify_column("id"));
assert!(df.verify_column("name"));
assert!(!df.verify_column("missing"));
}
#[test]
fn test_dataframe_column_names() {
let df = DataFrame::new(vec![
("a".to_string(), ColumnType::Int64),
("b".to_string(), ColumnType::String),
]);
let names = df.get_column_names();
assert_eq!(names, vec!["a", "b"]);
}
#[test]
fn test_dataframe_column_type() {
let df = DataFrame::new(vec![
("id".to_string(), ColumnType::Int64),
("name".to_string(), ColumnType::String),
]);
assert_eq!(df.get_column_type("id"), ColumnType::Int64);
assert_eq!(df.get_column_type("name"), ColumnType::String);
}
#[test]
fn test_dataframe_add_column() {
let mut df = DataFrame::new(vec![("id".to_string(), ColumnType::Int64)]);
let result = df.add_column(
"name".to_string(),
ColumnType::String,
ColumnData::String(vec![]),
);
assert!(result.is_ok());
assert_eq!(df.column_count(), 2);
}
#[test]
fn test_dataframe_add_duplicate_column() {
let mut df = DataFrame::new(vec![("id".to_string(), ColumnType::Int64)]);
let result = df.add_column(
"id".to_string(),
ColumnType::Int64,
ColumnData::Int64(vec![]),
);
assert!(result.is_err());
}
#[test]
fn test_dataframe_add_column_type_mismatch() {
let mut df = DataFrame::new(vec![]);
let result = df.add_column(
"x".to_string(),
ColumnType::Int64,
ColumnData::String(vec![]),
);
assert!(result.is_err());
}
#[test]
fn test_dataframe_get_column_index() {
let df = DataFrame::new(vec![
("a".to_string(), ColumnType::Int64),
("b".to_string(), ColumnType::String),
]);
assert_eq!(df.get_column_index("a"), Some(0));
assert_eq!(df.get_column_index("b"), Some(1));
assert_eq!(df.get_column_index("c"), None);
}
}