use column_store::sorted_df::SortedDataFrame;
use data_value::{DataValue, Extract};
use halfbrown::HashMap;
use ndarray::{Array1, Array2, ArrayView1};
use std::fmt;
pub mod column_store;
pub mod index;
pub mod join;
pub mod key;
use crate::{error::Error, CandidateData};
#[cfg(feature = "python")]
pub mod python;
#[cfg(feature = "python")]
use pyo3::prelude::*;
use crate::{
dataframe::{column_store::ColumnFrame, join::JoinRelation, key::Key},
MLChefMap,
};
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum TopN {
First(usize),
Last(usize),
}
#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
#[cfg_attr(feature = "python", pyclass)]
pub struct DataFrame {
pub constants: HashMap<Key, DataValue>,
pub dataframe: ColumnFrame,
pub metadata: HashMap<String, DataValue>,
}
impl fmt::Display for DataFrame {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.dataframe.fmt(f)
}
}
impl DataFrame {
pub fn new<C: Into<ColumnFrame>>(dataframe: C) -> Self {
Self {
constants: HashMap::new(),
dataframe: dataframe.into(),
metadata: HashMap::new(),
}
}
pub fn n_columns(&self) -> usize {
self.dataframe.data_frame.ncols()
}
pub fn n_rows(&self) -> usize {
self.dataframe.data_frame.nrows()
}
pub fn shrink(&mut self) {
self.dataframe.shrink();
}
pub fn add_metadata(&mut self, key: String, value: DataValue) {
self.metadata.insert(key, value);
}
pub fn get_metadata(&self, key: &str) -> Option<&DataValue> {
self.metadata.get(key)
}
pub fn join(&mut self, other: Self, join_type: &JoinRelation) -> Result<(), Error> {
for (key, value) in other.constants {
self.constants.insert(key, value);
}
self.dataframe.join(other.dataframe, join_type)
}
pub fn apply_function<F>(&mut self, keys: &[Key], mut func: F) -> Result<(), Error>
where
F: FnMut(&[Key], &mut ColumnFrame) -> Result<(), Error>,
{
self.dataframe.apply_function(keys, &mut func)
}
pub fn select(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
Ok(self.dataframe.select(keys))
}
pub fn select_typed<T: Extract>(&self, keys: Option<&[Key]>) -> Result<Array2<T>, Error> {
Ok(self.dataframe.select_typed(keys))
}
pub fn select_transposed_typed<D: Extract>(&self, keys: &[Key]) -> Vec<Vec<D>> {
self.dataframe.select_transposed_typed::<D>(keys)
}
pub fn select_column(&self, key: Key) -> Option<ndarray::ArrayView1<'_, DataValue>> {
self.dataframe.select_column(&key)
}
pub fn select_transposed(&self, keys: Option<&[Key]>) -> Result<Array2<DataValue>, Error> {
self.dataframe.select_transposed(keys)
}
pub fn insert_constant(&mut self, key: Key, value: DataValue) {
self.constants.insert(key, value);
}
pub fn push<C: CandidateData>(&mut self, item: C) -> Result<(), Error> {
self.dataframe.push(item)
}
pub fn remove_column(&mut self, keys: &[Key]) -> Result<Self, Error> {
self.dataframe.remove_column(keys).map(|x| x.into())
}
pub fn extend(&mut self, items: Self) -> Result<(), Error> {
self.dataframe.extend(items.dataframe)
}
pub fn len(&self) -> usize {
self.dataframe.len()
}
pub fn is_empty(&self) -> bool {
self.dataframe.is_empty()
}
pub fn add_single_column<K: Into<Key>>(
&mut self,
key: K,
values: Array1<DataValue>,
) -> Result<(), Error> {
self.dataframe.add_single_column(key, values)
}
pub fn get_single_column(&self, key: &Key) -> Option<ArrayView1<'_, DataValue>> {
self.dataframe.get_single_column(key)
}
pub fn get_single_column_typed<T: Extract>(&self, key: &Key) -> Option<Array1<T>> {
self.dataframe.get_single_column_typed(key)
}
pub fn sorted(&self, key: &Key) -> Result<SortedDataFrame<'_>, Error> {
self.dataframe.sorted(key)
}
pub fn filter(&self, filter: &crate::filter::FilterRules) -> Result<Self, Error> {
let filtered_df = self.dataframe.filter(filter)?;
Ok(Self {
constants: self.constants.clone(),
dataframe: filtered_df,
metadata: self.metadata.clone(),
})
}
#[cfg(feature = "polars-df")]
pub fn as_polars(&self) -> Result<polars::prelude::DataFrame, Error> {
let mut columns = vec![];
for key in self.dataframe.keys() {
let values = self
.dataframe
.get_single_column(key)
.ok_or_else(|| Error::NotFound(key.clone()))?
.into_iter()
.map(|x| into_polars_value(key, x.clone()))
.collect::<Vec<_>>();
let s = polars::prelude::Column::new(key.name().into(), values);
columns.push(s);
}
Ok(polars::prelude::DataFrame::new(columns)?)
}
pub fn load_from_messagepack(bytes: &[u8]) -> Result<Self, Error> {
rmp_serde::decode::from_slice(bytes).map_err(|e| Error::UnknownError(format!("{e:?}")))
}
pub fn store_into_messagepack(&self) -> Result<Vec<u8>, Error> {
rmp_serde::encode::to_vec(&self).map_err(|e| Error::UnknownError(format!("{e:?}")))
}
}
#[cfg(feature = "polars-df")]
pub fn polars_dtype(dtype: crate::DataType) -> polars::prelude::DataType {
use crate::DataType::*;
use polars::prelude::DataType::*;
match dtype {
Bool => Boolean,
U32 => UInt32,
I32 => Int32,
U8 => UInt8,
U64 => UInt64,
I64 => Int64,
F32 => Float32,
F64 => Float64,
U128 => UInt128,
I128 => Int128,
crate::DataType::String => polars::prelude::DataType::String,
Bytes => Binary,
crate::DataType::Unknown => Null,
Vec => List(Box::new(polars::prelude::DataType::Unknown(
polars::prelude::UnknownKind::Any,
))),
Map => Struct(vec![]),
}
}
#[cfg(feature = "polars-df")]
pub fn into_polars_value(key: &Key, dv: DataValue) -> polars::prelude::AnyValue<'static> {
use polars::prelude::AnyValue::*;
use polars::prelude::Field;
use crate::dataframe::column_store::convert_dv_to_dtype;
let dv = convert_dv_to_dtype(key, dv);
match dv {
DataValue::String(smart_string) => StringOwned(smart_string.as_str().into()),
DataValue::Bytes(items) => BinaryOwned(items),
DataValue::U8(x) => UInt32(x as _),
DataValue::Bool(x) => Boolean(x),
DataValue::I32(x) => Int32(x),
DataValue::U32(x) => UInt32(x),
DataValue::I64(x) => Int64(x),
DataValue::U64(x) => UInt64(x),
DataValue::I128(x) => Int128(x),
DataValue::F32(x) => Float32(x),
DataValue::F64(x) => Float64(x),
DataValue::Null => Null,
DataValue::Vec(data_values) => {
let mut dt = crate::DataType::Unknown;
for d in data_values.iter() {
match crate::detect_dtype(d) {
crate::DataType::Unknown => continue,
e => {
dt = e;
break;
}
}
}
let vec_key = Key::new(key.name(), dt);
let s = polars::series::Series::from_any_values(
key.name().into(),
&data_values
.into_iter()
.map(|x| into_polars_value(&vec_key, x))
.collect::<Vec<_>>(),
true,
);
List(s.expect(&format!("Cannot create series for {key:?}")))
}
DataValue::EnumNumber(x) => Int32(x),
DataValue::U128(x) => UInt128(x),
DataValue::Map(x) => {
let mut values = vec![];
let mut fields = vec![];
let mut sorted_keys = x.keys().collect::<Vec<_>>();
sorted_keys.sort();
for k in sorted_keys {
let value = x.get(k).expect(&format!("Key {key:?} should exists in hm"));
let dtype = crate::detect_dtype(value);
let k = Key::new(k, dtype);
values.push(into_polars_value(&k, value.to_owned()));
fields.push(Field::new(k.name().into(), polars_dtype(dtype)));
}
StructOwned(Box::new((values, fields)))
}
}
}
#[cfg(feature = "polars-df")]
pub fn from_polars_value(dv: polars::prelude::AnyValue<'_>) -> DataValue {
use polars::prelude::AnyValue::*;
match dv {
Null => DataValue::Null,
Boolean(v) => v.into(),
String(v) => DataValue::String(v.into()),
UInt8(v) => DataValue::U8(v),
UInt16(v) => DataValue::U32(v as u32),
UInt32(v) => v.into(),
UInt64(v) => v.into(),
Int8(v) => (v as i32).into(),
Int16(v) => (v as i32).into(),
Int32(v) => v.into(),
Int64(v) => v.into(),
Float32(v) => v.into(),
Float64(v) => v.into(),
Int128(v) => v.into(),
List(series) => DataValue::Vec(series.iter().map(from_polars_value).collect::<Vec<_>>()),
StringOwned(v) => DataValue::String(v.as_str().into()),
Binary(v) => DataValue::Bytes(v.to_owned()),
BinaryOwned(v) => DataValue::Bytes(v),
StructOwned(m) => {
let mut hm: std::collections::HashMap<smartstring::alias::String, DataValue> =
std::collections::HashMap::new();
for (k, v) in m.1.into_iter().zip(m.0.into_iter()) {
hm.insert(k.name.as_str().into(), from_polars_value(v));
}
DataValue::Map(hm)
}
e => {
tracing::warn!("Unsupported polars value: {e:?}");
DataValue::Null
}
}
}
impl From<ColumnFrame> for DataFrame {
fn from(dataframe: ColumnFrame) -> Self {
Self::new(dataframe)
}
}
impl From<Vec<std::collections::HashMap<Key, DataValue>>> for DataFrame {
fn from(dataframe: Vec<std::collections::HashMap<Key, DataValue>>) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
impl From<Vec<HashMap<Key, DataValue>>> for DataFrame {
fn from(dataframe: Vec<HashMap<Key, DataValue>>) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
impl From<std::collections::HashMap<String, Vec<DataValue>>> for DataFrame {
fn from(dataframe: std::collections::HashMap<String, Vec<DataValue>>) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
impl From<MLChefMap> for DataFrame {
fn from(dataframe: MLChefMap) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
impl From<Vec<(Key, Vec<DataValue>)>> for DataFrame {
fn from(dataframe: Vec<(Key, Vec<DataValue>)>) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
impl From<std::collections::HashMap<String, Array1<DataValue>>> for DataFrame {
fn from(dataframe: std::collections::HashMap<String, Array1<DataValue>>) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
#[cfg(feature = "polars-df")]
impl From<polars::prelude::DataFrame> for DataFrame {
fn from(dataframe: polars::prelude::DataFrame) -> Self {
Self::new(ColumnFrame::from(dataframe))
}
}
#[cfg(test)]
mod test {
use crate::filter::FilterRules;
use super::*;
use halfbrown::hashmap;
#[cfg(feature = "polars-df")]
use polars::prelude::NamedFrom as _;
use rstest::*;
use tracing_test::traced_test;
#[fixture]
fn dummy_candidates() -> ColumnFrame {
ColumnFrame::from(vec![
hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
"key2".into() => "b".into(),
},
])
}
#[rstest]
fn test_serde() {
let df = crate::df! {
"a" => [1u64, 2u64, 3u64],
"b" => [4u64, 5u64, 6u64],
"c" => [7u64, 8u64, 9u64]
};
let serialized = serde_json::to_string(&df).expect("BUG: Unable to serialize dataframe");
let deserialized =
serde_json::from_str(&serialized).expect("BUG: Unable to deserialize dataframe");
assert_eq!(df, deserialized);
}
#[cfg(feature = "polars-df")]
#[rstest]
fn test_polars() {
let expected = crate::df! {
"a" => [1u64, 2u64, 3u64],
"b" => [4f64, 5f64, 6f64],
"c" => [7i64, 8i64, 9i64]
};
let polars_df = polars::df!(
"a" => [1u64, 2u64, 3u64],
"b" => [4f64, 5f64, 6f64],
"c" => [7i64, 8i64, 9i64]
)
.expect("BUG: should be ok");
let as_df: DataFrame = polars_df.into();
let keys: Vec<Key> = vec!["a".into(), "b".into(), "c".into()];
assert_eq!(
as_df.select(Some(keys.as_slice())),
expected.select(Some(keys.as_slice()))
);
}
#[cfg(feature = "polars-df")]
use crate::DataType;
#[cfg(feature = "polars-df")]
#[rstest]
#[case::str(Key::new("a", DataType::String), DataValue::String("test".into()), polars::prelude::AnyValue::String("test".into()))]
#[case::u32(
Key::new("a", DataType::U32),
DataValue::U32(u32::MAX),
polars::prelude::AnyValue::UInt32(u32::MAX)
)]
#[case::i32(
Key::new("a", DataType::I32),
DataValue::I32(i32::MIN),
polars::prelude::AnyValue::Int32(i32::MIN)
)]
#[case::i64(
Key::new("a", DataType::I64),
DataValue::I64(i64::MIN),
polars::prelude::AnyValue::Int64(i64::MIN)
)]
#[case::u64(
Key::new("a", DataType::U64),
DataValue::U64(u64::MIN),
polars::prelude::AnyValue::UInt64(u64::MIN)
)]
#[case::f32(
Key::new("a", DataType::F32),
DataValue::F32(f32::MIN),
polars::prelude::AnyValue::Float32(f32::MIN)
)]
#[case::f64(
Key::new("a", DataType::F64),
DataValue::F64(f64::MIN),
polars::prelude::AnyValue::Float64(f64::MIN)
)]
#[case::null(
Key::new("a", DataType::Unknown),
DataValue::Null,
polars::prelude::AnyValue::Null
)]
#[case::i128(
Key::new("a", DataType::I128),
DataValue::I128(i128::MIN),
polars::prelude::AnyValue::Int128(i128::MIN)
)]
#[case::u8(
Key::new("a", DataType::U8),
DataValue::U8(255),
polars::prelude::AnyValue::UInt8(255)
)]
#[case::bool(
Key::new("a", DataType::Bool),
DataValue::Bool(true),
polars::prelude::AnyValue::Boolean(true)
)]
#[case::bytes(Key::new("a", DataType::Bytes), DataValue::Bytes("aaaaa".as_bytes().to_vec()), polars::prelude::AnyValue::BinaryOwned("aaaaa".as_bytes().to_vec()))]
#[case::vec_uints(Key::new("a", DataType::Vec), DataValue::Vec(vec![DataValue::U32(0), DataValue::U32(1)]), polars::prelude::AnyValue::List(polars::series::Series::new("v".into(), vec![polars::prelude::AnyValue::UInt32(0u32), polars::prelude::AnyValue::UInt32(1)])))]
#[case::map(Key::new("a", DataType::Map), DataValue::Map(data_value::stdhashmap!("a" => 0u64, "b" => "s")), polars::prelude::AnyValue::StructOwned(Box::new((
vec![polars::prelude::AnyValue::UInt64(0u64), polars::prelude::AnyValue::String("s".into())],
vec![polars::prelude::Field::new("a".into(), polars::prelude::DataType::UInt64), polars::prelude::Field::new("b".into(), polars::prelude::DataType::String)]))))]
fn into_polars_value_test(
#[case] key: Key,
#[case] input: DataValue,
#[case] output: polars::prelude::AnyValue<'static>,
) {
assert_eq!(into_polars_value(&key, input.clone()), output);
assert_eq!(from_polars_value(output), input);
}
#[rstest]
#[case(
DataFrame::new(crate::column_frame! {
"a" => [1f64, 2f64, 3f64],
"b" => [4i64, 5i64, 6i64],
"c" => [7i64, 8i64, 9i64]
}),
DataFrame::new(crate::column_frame! {
"a" => [1f64, 2f64],
"b" => [4i64, 5i64],
"c" => [7i64, 8i64]
}),
FilterRules::try_from("a >= 1f64 && (b <= 5 || c <= 8) && b >= 4").expect("BUG: cannot create filter rules"),
)]
#[case(
DataFrame::new(crate::column_frame! {
"a" => [1f64, 2f64, 3f64],
"b" => [4i64, 5i64, 6i64],
"c" => [7i64, 8i64, 9i64]
}),
DataFrame::new(crate::column_frame! {
"a" => [2f64],
"b" => [5i64],
"c" => [8i64]
}),
FilterRules::try_from("a % 2f64 == 0f64").expect("BUG: cannot create filter rules"),
)]
#[traced_test]
fn filter_test(
#[case] df: DataFrame,
#[case] expected: DataFrame,
#[case] filter: FilterRules,
) {
let filtered = df.filter(&filter).expect("BUG: cannot filter");
assert_eq!(filtered, expected);
}
#[rstest]
fn test_serde_complex() {
let simple = r#"
{
"constants": {},
"dataframe": {
"index": {
"keys": [
{
"key": 3162770485,
"name": "a",
"ctype": "U32"
},
{
"key": 2279056742,
"name": "b",
"ctype": "F64"
},
{
"key": 2994984227,
"name": "c",
"ctype": "U64"
},
{
"key": 3319645144,
"name": "d",
"ctype": "F64"
},
{
"key": 1291847470,
"name": "e",
"ctype": "U32"
},
{
"key": 874241070,
"name": "f",
"ctype": "Bool"
}
],
"indexes": {
"a": 0,
"b": 1,
"c": 2,
"d": 3,
"e": 4,
"f": 5
},
"alias": {}
},
"data_frame": {
"v": 1,
"dim": [
2,
6
],
"data": [
253780,
0.009369421750307085,
1633222860381359,
8,
5,
true,
64512,
0.003391335718333721,
1633222860810557,
8,
5,
null
]
}
},
"metadata": {}
}
"#;
let simple_deserialized: DataFrame =
serde_json::from_str(simple).expect("BUG: Unable to deserialize dataframe");
println!("deserialized: {simple_deserialized:?}");
let array = format!("[{}, {}, {}]", simple, simple, simple);
let deserialized: Vec<DataFrame> =
serde_json::from_str(&array).expect("BUG: Unable to deserialize dataframe");
println!("deserialized: {deserialized:?}");
assert_eq!(deserialized.len(), 3);
assert_eq!(simple_deserialized, deserialized[0]);
}
#[rstest]
#[case(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
#[case(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
#[case(vec![hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
},])]
#[case(vec![data_value::stdhashmap! {
"key1" => DataValue::from(1),
"key2" => DataValue::from("a"),
},data_value::stdhashmap! {
"key1" => DataValue::from(2),
},])]
#[case(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(),
vec![DataValue::from("a"), DataValue::Null])])]
fn test_select_column<T: Into<DataFrame>>(#[case] input: T) {
let df: DataFrame = input.into();
assert_eq!(
df,
DataFrame {
constants: HashMap::new(),
dataframe: ColumnFrame::from(vec![
hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
},
]),
metadata: HashMap::new(),
}
);
let selected_transposed = df.select_column("key1".into());
assert!(selected_transposed.is_some());
let selected_transposed = selected_transposed.unwrap();
assert_eq!(selected_transposed.len(), 2);
assert_eq!(selected_transposed, ndarray::array![1.into(), 2.into()]);
}
#[rstest]
#[case::hhm(hashmap!("key1".into() => vec![1.into(), 2.into()], "key2".into() => vec!["a".into()]))]
#[case::stdhm(data_value::stdhashmap!("key1" => vec![1, 2], "key2" => vec!["a"]))]
#[case::hm({
let hm: std::collections::HashMap<String, Array1<DataValue>> = data_value::stdhashmap!("key1".to_string() => Array1::from_vec(vec![DataValue::from(1), DataValue::from(2)]), "key2".to_string() => Array1::from_vec(vec![DataValue::from("a"), DataValue::Null]));
hm
})]
#[case::vec_hhm(vec![hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
},])]
#[case::vec_hme(vec![data_value::stdhashmap! {
"key1" => DataValue::from(1),
"key2" => DataValue::from("a"),
},data_value::stdhashmap! {
"key1" => DataValue::from(2),
},])]
#[case::vec_vec(vec![("key1".into(), vec! [DataValue::from(1), DataValue::from(2)]), ("key2".into(), vec![DataValue::from("a"), DataValue::Null])])]
fn test_from_conversion<T: Into<DataFrame>>(#[case] input: T) {
let df: DataFrame = input.into();
let expected: DataFrame = DataFrame {
constants: HashMap::new(),
dataframe: ColumnFrame::from(vec![
hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
},
]),
metadata: HashMap::new(),
};
assert_eq!(
df.select(Some(&["key1".into(), "key2".into()])),
expected.select(Some(&["key1".into(), "key2".into()])),
"{df} vs {expected}"
);
let selected_transposed = df.select_transposed_typed::<i32>(&["key1".into()]);
assert_eq!(selected_transposed.len(), 2);
println!("{:?}", selected_transposed);
assert_eq!(selected_transposed, vec![vec![1], vec![2]]);
}
#[rstest]
fn test_dataframe(dummy_candidates: ColumnFrame) {
let mut dataframe: DataFrame = DataFrame::default();
assert!(dataframe.is_empty());
assert!(dataframe.extend(dummy_candidates.into()).is_ok());
assert_eq!(dataframe.len(), 2);
let candidate = hashmap! {
"key1".into() => 3.into(),
"key2".into() => "c".into(),
};
assert!(dataframe.push(candidate).is_ok());
assert_eq!(dataframe.len(), 3);
assert!(!dataframe.is_empty());
dataframe.insert_constant("key3".into(), 4.into());
assert_eq!(dataframe.constants.len(), 1);
assert!(dataframe
.apply_function(&["key1".into()], |keys, df| {
let key = keys[0].clone();
let s = df
.get_single_column(&key)
.expect("BUG: Cannot get column")
.to_owned();
let s = s.mapv(|x| x + DataValue::from(1));
df.add_single_column("key5", s)?;
Ok(())
})
.is_ok());
let original = dataframe.clone();
dataframe.shrink();
let remove_df = dataframe.remove_column(&["key1".into()]);
assert!(remove_df.is_ok());
let mut remove_df = remove_df.unwrap();
assert_eq!(remove_df.len(), 3);
let selected = dataframe.select(Some(&["key2".into()]));
assert!(selected.is_ok());
let selected = selected.unwrap();
println!("{:?}", selected);
let joined_result =
remove_df.join(dataframe, &JoinRelation::new(crate::JoinBy::AddColumns));
assert!(joined_result.is_ok(), "{:?}", joined_result);
let keys = vec!["key1".into(), "key2".into(), "key5".into()];
assert_eq!(
original.select(Some(keys.as_slice())),
remove_df.select(Some(keys.as_slice()))
);
}
#[rstest]
fn test_size_methods() {
let candidate = hashmap! {
"key1".into() => 3.into(),
"key2".into() => "c".into(),
"key3".into() => false.into()
};
let dataframe: DataFrame = vec![candidate].into();
assert_eq!(dataframe.n_columns(), 3);
assert_eq!(dataframe.n_rows(), 1);
}
#[rstest]
fn test_metadata(dummy_candidates: ColumnFrame) {
let mut dataframe: DataFrame = DataFrame::default();
assert!(dataframe.is_empty());
println!("{:?}", dataframe);
assert!(dataframe.extend(dummy_candidates.into()).is_ok());
println!("{:?}", dataframe);
assert_eq!(dataframe.len(), 2);
dataframe.add_metadata("test".into(), 1.into());
assert_eq!(dataframe.get_metadata("test"), Some(&1.into()));
let dataframe = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"key1".into() => 1.into(),
"key2".into() => "a".into(),
},
hashmap! {
"key1".into() => 2.into(),
"key2".into() => "b".into(),
},
]));
assert_eq!(dataframe.get_metadata("test"), None);
let tt = dataframe.select_transposed(None);
assert!(tt.is_ok());
let tt = tt.unwrap();
assert_eq!(tt.shape(), [2, 2]);
assert_eq!(
tt,
Array2::from_shape_vec((2, 2), vec![1.into(), 2.into(), "a".into(), "b".into()])
.unwrap()
);
}
#[rstest]
#[traced_test]
fn add_single_column_test() {
let mut dataframe = DataFrame::default();
let values = Array1::from(vec![1.into(), 2.into(), 3.into()]);
let r = dataframe.add_single_column("key1", values);
assert!(r.is_ok(), "{r:?}");
let selected = dataframe.select(None);
assert!(selected.is_ok());
let selected = selected.unwrap();
assert_eq!(selected.shape(), [3, 1]);
assert_eq!(
selected,
Array2::from_shape_vec((3, 1), vec![1.into(), 2.into(), 3.into()]).unwrap()
);
let values = Array1::from(vec![1.into(), 2.into()]);
assert!(dataframe.add_single_column("key1", values).is_err());
let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
assert!(dataframe.add_single_column("key2", values).is_ok());
let values = Array1::from(vec![3.into()]);
assert!(dataframe.add_single_column("key3", values).is_err());
}
#[rstest]
#[traced_test]
fn add_single_column_empty_test() {
let mut dataframe = DataFrame::default();
let values = Array1::from(vec![]);
let r = dataframe.add_single_column("key1", values);
assert!(r.is_ok(), "{r:?}");
let selected = dataframe.select(None);
assert!(selected.is_ok());
let selected = selected.unwrap();
assert_eq!(selected.shape(), [0, 1]);
assert_eq!(selected, Array2::from_shape_vec((0, 1), vec![]).unwrap());
let values = Array1::from(vec![1.into(), 2.into()]);
assert!(dataframe.add_single_column("key1", values).is_err());
let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
assert!(dataframe.add_single_column("key2", values).is_ok());
let values = Array1::from(vec![3.into(), 4.into()]);
assert!(dataframe.add_single_column("key3", values).is_err());
let values = Array1::from(vec![3.into(), 4.into(), 5.into()]);
assert!(dataframe.add_single_column("key3", values).is_ok());
assert_eq!(
dataframe
.select_column("key1".into())
.expect("BUG: has to exists"),
ndarray::arr1(&[DataValue::Null, DataValue::Null, DataValue::Null]),
);
assert_eq!(
dataframe
.select_column("key2".into())
.expect("BUG: has to exists"),
ndarray::arr1(&[3.into(), 4.into(), 5.into()]),
);
assert_eq!(
dataframe.select(None).expect("BUG: cannot get data"),
ndarray::arr2(&[
[DataValue::Null, 3.into(), 3.into()],
[DataValue::Null, 4.into(), 4.into()],
[DataValue::Null, 5.into(), 5.into()],
])
);
}
#[rstest]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 1.into(),
"k2".into() => 2.into(),
"k3".into() => 2.2.into(),
},
hashmap! {
"k".into() => 11.into(),
"k2".into() => 3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.4.into(),
},
])),
vec!["k".into(), "k2".into()],
Array2::from_shape_vec((4, 2), vec![1.into(), 2.into(), 11.into(), 3.into(), 4.into(), 5.into(), 4.into(), 5.into()]).unwrap()
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 1.into(),
"k2".into() => 2.into(),
"k3".into() => 2.2.into(),
},
hashmap! {
"k".into() => 11.into(),
"k2".into() => 3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.4.into(),
},
])),
vec!["k2".into(), "k3".into(), "nonexist1".into(), "nonexists2".into(), "k".into()],
Array2::from_shape_vec((4, 5), vec![
2.into(), 2.2.into(), DataValue::Null, DataValue::Null, 1.into(),
3.into(), DataValue::Null, DataValue::Null, DataValue::Null, 11.into(),
5.into(), 2.3.into(), DataValue::Null, DataValue::Null, 4.into(),
5.into(), 2.4.into(), DataValue::Null, DataValue::Null, 4.into()]).unwrap()
)]
#[traced_test]
fn select_multiple(
#[case] input: DataFrame,
#[case] columns: Vec<Key>,
#[case] expected: Array2<DataValue>,
) {
let selected = input.select(Some(&columns));
assert!(selected.is_ok());
let selected = selected.unwrap();
assert_eq!(selected, expected);
}
#[rstest]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 1.into(),
"k2".into() => 2.into(),
"k3".into() => 2.2.into(),
},
hashmap! {
"k".into() => 11.into(),
"k2".into() => 3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.4.into(),
},
])),
"k".into(),
Array2::from_shape_vec((4, 3), vec![
1.into(), 2.into(), 2.2.into(),
4.into(), 5.into(), 2.3.into(),
4.into(), 5.into(), 2.4.into(),
11.into(), 3.into(), DataValue::Null,
]
).unwrap(),
vec!["k".into(), "k2".into(), "k3".into()],
)]
#[rstest]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 1.into(),
"k2".into() => 2.into(),
"k3".into() => 2.2.into(),
},
hashmap! {
"k".into() => 11.into(),
"k2".into() => 3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.3.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 5.into(),
"k3".into() => 2.4.into(),
},
])),
"k3".into(),
Array2::from_shape_vec((4, 3), vec![
11.into(), 3.into(), DataValue::Null,
1.into(), 2.into(), 2.2.into(),
4.into(), 5.into(), 2.3.into(),
4.into(), 5.into(), 2.4.into(),
]
).unwrap(),
vec!["k".into(), "k2".into(), "k3".into()],
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => 0.000001.into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>0.0000001.into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() => 0.00001.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 0.001.into(),
},
])),
"k2".into(),
Array2::from_shape_vec((4, 2), vec![
1.into(), 0.0000001.into(),
2.into(), 0.000001.into(),
3.into(), 0.00001.into(),
4.into(), 0.001.into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => "b".into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>"a".into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() =>"c".into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() =>"z".into(),
},
])),
"k2".into(),
Array2::from_shape_vec((4, 2), vec![
1.into(),"a".into(),
2.into(), "b".into(),
3.into(), "c".into(),
4.into(), "z".into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[traced_test]
fn sort_by(
#[case] input: DataFrame,
#[case] column: Key,
#[case] expected: Array2<DataValue>,
#[case] columns: Vec<Key>,
) {
let result = input.sorted(&column);
assert!(result.is_ok(), "{result:?}");
let result = result.unwrap().get_sorted();
let selected = result.select(Some(&columns));
assert_eq!(selected, expected);
}
#[rstest]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => 0.000001.into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>0.0000001.into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() => 0.00001.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 0.001.into(),
},
])),
"k2".into(),
TopN::Last(1),
Array2::from_shape_vec((1, 2), vec![
4.into(), 0.001.into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => 0.000001.into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>0.0000001.into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() => 0.00001.into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() => 0.001.into(),
},
])),
"k2".into(),
TopN::Last(2),
Array2::from_shape_vec((2, 2), vec![
4.into(), 0.001.into(),
3.into(), 0.00001.into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => "b".into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>"a".into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() =>"c".into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() =>"z".into(),
},
])),
"k2".into(),
TopN::First(1),
Array2::from_shape_vec((1, 2), vec![
1.into(),"a".into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[case(
DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"k".into() => 2.into(),
"k2".into() => "b".into(),
},
hashmap! {
"k".into() => 1.into(),
"k2".into() =>"a".into(),
},
hashmap! {
"k".into() => 3.into(),
"k2".into() =>"c".into(),
},
hashmap! {
"k".into() => 4.into(),
"k2".into() =>"z".into(),
},
])),
"k2".into(),
TopN::First(2),
Array2::from_shape_vec((2, 2), vec![
1.into(),"a".into(),
2.into(),"b".into(),
]
).unwrap(),
vec!["k".into(), "k2".into()],
)]
#[traced_test]
fn top_n(
#[case] input: DataFrame,
#[case] column: Key,
#[case] topn: TopN,
#[case] expected: Array2<DataValue>,
#[case] columns: Vec<Key>,
) {
let result = input.sorted(&column);
assert!(result.is_ok(), "{result:?}");
let result = result.unwrap();
let first = result.topn(topn).unwrap();
let selected = first.select(Some(&columns));
assert_eq!(selected, expected);
}
#[rstest]
fn test_messagepack_roundtrip_empty_dataframe() {
let df = DataFrame::default();
let bytes = df
.store_into_messagepack()
.expect("failed to serialize empty df");
let restored =
DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize empty df");
assert_eq!(df, restored);
assert!(restored.is_empty());
}
#[rstest]
fn test_messagepack_roundtrip_strings_and_bools() {
let df = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"str".into() => DataValue::String("hello".into()),
"bool".into() => DataValue::Bool(true),
},
hashmap! {
"str".into() => DataValue::String("".into()),
"bool".into() => DataValue::Bool(false),
},
]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
}
#[rstest]
fn test_messagepack_roundtrip_f64_values() {
let df = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"a".into() => DataValue::F64(3.14),
},
hashmap! {
"a".into() => DataValue::F64(-2.718),
},
]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
}
#[rstest]
fn test_messagepack_f64_special_values_survive_roundtrip() {
let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
"a".into() => DataValue::F64(f64::INFINITY),
}]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(restored.len(), 1);
let col = restored.select_column("a".into()).expect("col exists");
match &col[0] {
DataValue::F64(v) => assert!(v.is_infinite() && v.is_sign_positive()),
other => panic!("expected F64, got {other:?}"),
}
}
#[rstest]
fn test_messagepack_roundtrip_with_nulls() {
let df = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"a".into() => DataValue::String("x".into()),
"b".into() => DataValue::String("y".into()),
},
hashmap! {
"a".into() => DataValue::String("z".into()),
},
]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
}
#[rstest]
fn test_messagepack_roundtrip_with_metadata() {
let mut df = DataFrame::new(crate::column_frame! {
"col" => ["a", "b"]
});
df.add_metadata("name".into(), DataValue::String("test_df".into()));
df.add_metadata("flag".into(), DataValue::Bool(true));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
assert_eq!(
restored.get_metadata("name"),
Some(&DataValue::String("test_df".into()))
);
assert_eq!(restored.get_metadata("flag"), Some(&DataValue::Bool(true)));
}
#[rstest]
fn test_messagepack_roundtrip_with_constants() {
let mut df = DataFrame::new(crate::column_frame! {
"x" => ["a", "b"]
});
df.insert_constant("const_key".into(), DataValue::String("const_val".into()));
df.insert_constant("const_flag".into(), DataValue::Bool(false));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
assert_eq!(
restored.constants.get(&"const_key".into()),
Some(&DataValue::String("const_val".into()))
);
}
#[rstest]
fn test_messagepack_integer_type_coercion() {
let df = crate::df! {
"a" => [1i64, 2i64, 3i64]
};
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(restored.len(), 3);
let col = restored
.select_column("a".into())
.expect("column should exist");
assert_ne!(
col[0],
DataValue::I64(1),
"messagepack coerces small ints to compact types"
);
}
#[rstest]
fn test_messagepack_large_i64_preserved() {
let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
"big".into() => DataValue::I64(i64::MIN),
}]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
}
#[rstest]
fn test_messagepack_load_invalid_bytes() {
let result = DataFrame::load_from_messagepack(&[0xFF, 0xFE, 0xFD, 0x00]);
assert!(result.is_err());
}
#[rstest]
fn test_messagepack_load_empty_bytes() {
let result = DataFrame::load_from_messagepack(&[]);
assert!(result.is_err());
}
#[rstest]
fn test_messagepack_load_truncated_bytes() {
let df = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"a".into() => DataValue::String("hello world".into()),
"b".into() => DataValue::Bool(true),
},
hashmap! {
"a".into() => DataValue::String("test".into()),
"b".into() => DataValue::Bool(false),
},
]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let truncated = &bytes[..bytes.len() / 2];
let result = DataFrame::load_from_messagepack(truncated);
assert!(result.is_err());
}
#[rstest]
fn test_messagepack_roundtrip_with_nested_vec_data() {
let df = DataFrame::new(ColumnFrame::from(vec![hashmap! {
"vec_col".into() => DataValue::Vec(vec![
DataValue::String("a".into()),
DataValue::String("b".into()),
]),
"bytes_col".into() => DataValue::Bytes(vec![0, 1, 255]),
}]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(df, restored);
}
#[rstest]
fn test_messagepack_roundtrip_preserves_row_count() {
let df = DataFrame::new(ColumnFrame::from(vec![
hashmap! { "a".into() => DataValue::String("x".into()) },
hashmap! { "a".into() => DataValue::String("y".into()) },
hashmap! { "a".into() => DataValue::String("z".into()) },
]));
let bytes = df.store_into_messagepack().expect("failed to serialize");
let restored = DataFrame::load_from_messagepack(&bytes).expect("failed to deserialize");
assert_eq!(restored.len(), 3);
assert_eq!(restored.n_rows(), 3);
assert_eq!(restored.n_columns(), 1);
}
#[rstest]
fn test_messagepack_idempotent_double_roundtrip() {
let mut df = DataFrame::new(ColumnFrame::from(vec![
hashmap! {
"a".into() => DataValue::String("hello".into()),
"b".into() => DataValue::Bool(true),
},
hashmap! {
"a".into() => DataValue::String("world".into()),
"b".into() => DataValue::Bool(false),
},
]));
df.add_metadata("meta".into(), DataValue::Bool(true));
df.insert_constant("c".into(), DataValue::String("const".into()));
let bytes1 = df.store_into_messagepack().expect("first serialize");
let restored1 = DataFrame::load_from_messagepack(&bytes1).expect("first deserialize");
let bytes2 = restored1
.store_into_messagepack()
.expect("second serialize");
let restored2 = DataFrame::load_from_messagepack(&bytes2).expect("second deserialize");
assert_eq!(df, restored2);
assert_eq!(bytes1, bytes2);
}
#[rstest]
fn test_messagepack_single_byte_payload() {
let result = DataFrame::load_from_messagepack(&[0x01]);
assert!(result.is_err());
}
#[rstest]
fn test_hash_datavalue_public_api_accessible() {
let val = DataValue::I32(42);
let h = crate::hash_datavalue(&val);
assert_eq!(h, crate::hash_datavalue(&DataValue::I32(42)));
}
#[rstest]
fn test_hash_datavalue_vec_length_matters() {
let short = DataValue::Vec(vec![DataValue::I32(1)]);
let long = DataValue::Vec(vec![DataValue::I32(1), DataValue::Null]);
assert_ne!(crate::hash_datavalue(&short), crate::hash_datavalue(&long));
}
#[rstest]
fn test_hash_datavalue_map_different_keys_same_values() {
let mut m1 = std::collections::HashMap::new();
m1.insert("a".into(), DataValue::I32(1));
let mut m2 = std::collections::HashMap::new();
m2.insert("b".into(), DataValue::I32(1));
assert_ne!(
crate::hash_datavalue(&DataValue::Map(m1)),
crate::hash_datavalue(&DataValue::Map(m2))
);
}
#[rstest]
fn test_hash_datavalue_empty_string_vs_empty_bytes() {
let empty_str = DataValue::String("".into());
let empty_bytes = DataValue::Bytes(vec![]);
assert_ne!(
crate::hash_datavalue(&empty_str),
crate::hash_datavalue(&empty_bytes)
);
}
#[rstest]
fn test_hash_datavalue_empty_vec_vs_empty_map() {
let empty_vec = DataValue::Vec(vec![]);
let empty_map = DataValue::Map(std::collections::HashMap::new());
assert_ne!(
crate::hash_datavalue(&empty_vec),
crate::hash_datavalue(&empty_map)
);
}
#[rstest]
fn test_hash_datavalue_i128_boundary_values() {
let max = DataValue::I128(i128::MAX);
let min = DataValue::I128(i128::MIN);
let zero = DataValue::I128(0);
let neg_one = DataValue::I128(-1);
let hashes: std::collections::HashSet<u64> = [&max, &min, &zero, &neg_one]
.iter()
.map(|v| crate::hash_datavalue(v))
.collect();
assert_eq!(hashes.len(), 4);
}
#[rstest]
fn test_hash_datavalue_u128_boundary_values() {
let max = DataValue::U128(u128::MAX);
let zero = DataValue::U128(0);
let one = DataValue::U128(1);
let i128_neg1 = DataValue::I128(-1);
assert_ne!(
crate::hash_datavalue(&max),
crate::hash_datavalue(&i128_neg1)
);
let hashes: std::collections::HashSet<u64> = [&max, &zero, &one]
.iter()
.map(|v| crate::hash_datavalue(v))
.collect();
assert_eq!(hashes.len(), 3);
}
#[rstest]
fn test_hash_datavalue_f64_special_values() {
let nan1 = DataValue::F64(f64::NAN);
let nan2 = DataValue::F64(f64::NAN);
assert_eq!(crate::hash_datavalue(&nan1), crate::hash_datavalue(&nan2));
let subnormal = DataValue::F64(f64::MIN_POSITIVE / 2.0);
let normal = DataValue::F64(f64::MIN_POSITIVE);
assert_ne!(
crate::hash_datavalue(&subnormal),
crate::hash_datavalue(&normal)
);
}
#[rstest]
fn test_hash_datavalue_enum_number_vs_i32_same_value() {
let enum_val = DataValue::EnumNumber(42);
let i32_val = DataValue::I32(42);
assert_ne!(
crate::hash_datavalue(&enum_val),
crate::hash_datavalue(&i32_val)
);
}
#[rstest]
fn get_single_column_typed_f64_from_i32() {
let df = crate::df! {
"a" => [1i32, 2i32, 3i32]
};
let key: Key = "a".into();
let col = df.get_single_column_typed::<f64>(&key).unwrap();
assert_eq!(col, ndarray::arr1(&[1.0f64, 2.0, 3.0]));
}
#[rstest]
fn get_single_column_typed_string() {
let df = crate::df! {
"name" => ["alice", "bob"]
};
let key: Key = "name".into();
let col = df.get_single_column_typed::<String>(&key).unwrap();
assert_eq!(
col,
ndarray::arr1(&["alice".to_string(), "bob".to_string()])
);
}
#[rstest]
fn get_single_column_typed_missing_key() {
let df = crate::df! {
"a" => [1u64, 2u64]
};
let missing: Key = "z".into();
assert!(df.get_single_column_typed::<u64>(&missing).is_none());
}
#[rstest]
fn get_single_column_typed_matches_untyped() {
let df = crate::df! {
"v" => [10u64, 20u64, 30u64]
};
let key: Key = "v".into();
let typed = df.get_single_column_typed::<u64>(&key).unwrap();
let untyped = df.get_single_column(&key).unwrap();
for (t, u) in typed.iter().zip(untyped.iter()) {
assert_eq!(*t, u64::extract(u));
}
}
#[rstest]
fn get_single_column_typed_bool_from_i32() {
let df = crate::df! {
"flag" => [1i32, 0i32, 1i32, 0i32]
};
let key: Key = "flag".into();
let col = df.get_single_column_typed::<bool>(&key).unwrap();
assert_eq!(col, ndarray::arr1(&[true, false, true, false]));
}
#[rstest]
fn get_single_column_typed_i64_from_u32() {
let df = crate::df! {
"x" => [10u32, 20u32, 30u32]
};
let key: Key = "x".into();
let col = df.get_single_column_typed::<i64>(&key).unwrap();
assert_eq!(col, ndarray::arr1(&[10i64, 20i64, 30i64]));
}
#[rstest]
fn get_single_column_typed_f64_truncation_to_i32() {
let df = crate::df! {
"v" => [1.9f64, 2.1f64, 3.7f64]
};
let key: Key = "v".into();
let col = df.get_single_column_typed::<i32>(&key).unwrap();
assert_eq!(col, ndarray::arr1(&[1i32, 2i32, 3i32]));
}
#[rstest]
fn get_single_column_typed_single_element() {
let df = crate::df! {
"solo" => [42u64]
};
let key: Key = "solo".into();
let col = df.get_single_column_typed::<f64>(&key).unwrap();
assert_eq!(col.len(), 1);
assert_eq!(col[0], 42.0);
}
#[rstest]
fn select_typed_all_columns() {
let df = crate::df! {
"a" => [1i32, 2i32, 3i32],
"b" => [4i32, 5i32, 6i32]
};
let result = df.select_typed::<f64>(None).unwrap();
assert_eq!(result.nrows(), 3);
assert_eq!(result.ncols(), 2);
assert_eq!(result[[0, 0]], 1.0);
assert_eq!(result[[0, 1]], 4.0);
assert_eq!(result[[2, 0]], 3.0);
assert_eq!(result[[2, 1]], 6.0);
}
#[rstest]
fn select_typed_specific_keys() {
let df = crate::df! {
"x" => [10u64, 20u64],
"y" => [30u64, 40u64],
"z" => [50u64, 60u64]
};
let keys: Vec<Key> = vec!["x".into(), "z".into()];
let result = df.select_typed::<i64>(Some(&keys)).unwrap();
assert_eq!(result.nrows(), 2);
assert_eq!(result.ncols(), 2);
assert_eq!(result[[0, 0]], 10i64);
assert_eq!(result[[0, 1]], 50i64);
assert_eq!(result[[1, 0]], 20i64);
assert_eq!(result[[1, 1]], 60i64);
}
#[rstest]
fn select_typed_nonexistent_key_gives_empty() {
let df = crate::df! {
"a" => [1i32, 2i32]
};
let keys: Vec<Key> = vec!["missing".into()];
let result = df.select_typed::<f64>(Some(&keys)).unwrap();
assert_eq!(result.shape(), &[0, 0]);
}
#[rstest]
fn select_typed_matches_select_with_extract() {
let df = crate::df! {
"a" => [1u64, 2u64, 3u64],
"b" => [4u64, 5u64, 6u64]
};
let typed = df.select_typed::<f64>(None).unwrap();
let manual = df.select(None).unwrap().mapv(|v| f64::extract(&v));
assert_eq!(typed, manual);
}
#[rstest]
fn select_typed_string_values() {
let df = crate::df! {
"name" => ["alice", "bob", "carol"]
};
let result = df.select_typed::<String>(None).unwrap();
assert_eq!(result[[0, 0]], "alice");
assert_eq!(result[[1, 0]], "bob");
assert_eq!(result[[2, 0]], "carol");
}
#[rstest]
fn select_typed_cross_numeric_coercion() {
let df = crate::df! {
"a" => [1i32, 2i32, 3i32]
};
let result = df.select_typed::<u64>(None).unwrap();
assert_eq!(result[[0, 0]], 1u64);
assert_eq!(result[[1, 0]], 2u64);
assert_eq!(result[[2, 0]], 3u64);
}
}