#![allow(clippy::box_collection, clippy::redundant_allocation, clippy::vec_box)]
use std::borrow::Cow;
use std::rc::Rc;
use std::sync::Arc;
use chrono::NaiveDate;
use df_derive::ToDataFrame;
use polars::prelude::*;
use crate::core::dataframe::{ToDataFrame, ToDataFrameVec};
#[derive(ToDataFrame, Clone)]
struct Bare {
bx_i64: Box<i64>,
arc_str: Arc<String>,
rc_date: Rc<NaiveDate>,
bx_date: Box<NaiveDate>,
cow_date: Cow<'static, NaiveDate>,
bx_dur: Box<chrono::Duration>,
bxbx_i32: Box<Box<i32>>,
}
#[derive(ToDataFrame, Clone)]
struct Composed {
opt_bx_i32: Option<Box<i32>>,
bx_opt_bool: Box<Option<bool>>,
vec_arc_string: Vec<Arc<String>>,
bx_vec_f64: Box<Vec<f64>>,
}
#[derive(ToDataFrame, Clone)]
struct BoundaryPositions {
opt_box_vec: Option<Box<Vec<i32>>>,
opt_box_opt: Option<Box<Option<i32>>>,
vec_box_opt: Vec<Box<Option<i32>>>,
vec_box_vec: Vec<Box<Vec<i32>>>,
vec_opt_arc_vec: Vec<Option<Arc<Vec<i32>>>>,
#[df_derive(as_binary)]
opt_box_blob: Option<Box<Vec<u8>>>,
}
#[derive(ToDataFrame, Clone)]
struct Regression {
#[df_derive(as_binary)]
bx_blob: Box<Vec<u8>>,
arc_date: Arc<NaiveDate>,
bx_chrono_dur: Box<chrono::Duration>,
}
fn schema_dtype(schema: &[(String, DataType)], col: &str) -> DataType {
schema
.iter()
.find(|(n, _)| n == col)
.map(|(_, dt)| dt.clone())
.unwrap_or_else(|| panic!("column {col} missing"))
}
fn assert_list_i32(df: &DataFrame, col: &str, row: usize, expected: &[Option<i32>]) {
let AnyValue::List(inner) = df.column(col).unwrap().get(row).unwrap() else {
panic!("expected List for {col}[{row}]");
};
let actual: Vec<Option<i32>> = inner.i32().unwrap().into_iter().collect();
assert_eq!(actual, expected, "{col}[{row}]");
}
fn assert_nested_list_i32(
df: &DataFrame,
col: &str,
row: usize,
expected: &[Option<Vec<Option<i32>>>],
) {
let AnyValue::List(outer) = df.column(col).unwrap().get(row).unwrap() else {
panic!("expected outer List for {col}[{row}]");
};
let actual: Vec<Option<Vec<Option<i32>>>> = outer
.iter()
.map(|value| match value {
AnyValue::List(inner) => Some(inner.i32().unwrap().into_iter().collect()),
AnyValue::Null => None,
other => panic!("unexpected nested list value for {col}[{row}]: {other:?}"),
})
.collect();
assert_eq!(actual, expected, "{col}[{row}]");
}
#[test]
fn runtime_semantics() {
println!("--- Smart-pointer transparency ---");
let bare_schema = Bare::schema().unwrap();
assert_eq!(schema_dtype(&bare_schema, "bx_i64"), DataType::Int64);
assert_eq!(schema_dtype(&bare_schema, "arc_str"), DataType::String);
assert_eq!(schema_dtype(&bare_schema, "rc_date"), DataType::Date);
assert_eq!(schema_dtype(&bare_schema, "bx_date"), DataType::Date);
assert_eq!(schema_dtype(&bare_schema, "cow_date"), DataType::Date);
assert_eq!(
schema_dtype(&bare_schema, "bx_dur"),
DataType::Duration(TimeUnit::Nanoseconds)
);
assert_eq!(schema_dtype(&bare_schema, "bxbx_i32"), DataType::Int32);
let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
let leap = NaiveDate::from_ymd_opt(2000, 2, 29).unwrap();
let bare0 = Bare {
bx_i64: Box::new(42),
arc_str: Arc::new("hello".to_string()),
rc_date: Rc::new(epoch),
bx_date: Box::new(leap),
cow_date: Cow::Owned(leap),
bx_dur: Box::new(chrono::Duration::nanoseconds(1_500)),
bxbx_i32: Box::new(Box::new(-7)),
};
let bare1 = bare0.clone();
let bare_df = vec![bare0, bare1].as_slice().to_dataframe().unwrap();
assert_eq!(bare_df.shape(), (2, 7));
assert!(matches!(
bare_df.column("bx_i64").unwrap().get(0).unwrap(),
AnyValue::Int64(42)
));
assert!(matches!(
bare_df.column("bxbx_i32").unwrap().get(0).unwrap(),
AnyValue::Int32(-7)
));
match bare_df.column("arc_str").unwrap().get(0).unwrap() {
AnyValue::String(s) => assert_eq!(s, "hello"),
AnyValue::StringOwned(ref s) => assert_eq!(s.as_str(), "hello"),
other => panic!("unexpected arc_str AnyValue {other:?}"),
}
match bare_df.column("rc_date").unwrap().get(0).unwrap() {
AnyValue::Date(d) => assert_eq!(d, 0),
other => panic!("unexpected rc_date AnyValue {other:?}"),
}
match bare_df.column("bx_date").unwrap().get(0).unwrap() {
AnyValue::Date(d) => {
assert_eq!(d, leap.signed_duration_since(epoch).num_days() as i32)
}
other => panic!("unexpected bx_date AnyValue {other:?}"),
}
match bare_df.column("bx_dur").unwrap().get(0).unwrap() {
AnyValue::Duration(v, u) => {
assert_eq!(u, TimeUnit::Nanoseconds);
assert_eq!(v, 1_500);
}
other => panic!("unexpected bx_dur AnyValue {other:?}"),
}
let composed_schema = Composed::schema().unwrap();
assert_eq!(
schema_dtype(&composed_schema, "opt_bx_i32"),
DataType::Int32
);
assert_eq!(
schema_dtype(&composed_schema, "bx_opt_bool"),
DataType::Boolean
);
assert_eq!(
schema_dtype(&composed_schema, "vec_arc_string"),
DataType::List(Box::new(DataType::String))
);
assert_eq!(
schema_dtype(&composed_schema, "bx_vec_f64"),
DataType::List(Box::new(DataType::Float64))
);
let row0 = Composed {
opt_bx_i32: Some(Box::new(123)),
bx_opt_bool: Box::new(Some(true)),
vec_arc_string: vec![Arc::new("a".to_string()), Arc::new("b".to_string())],
bx_vec_f64: Box::new(vec![1.5, 2.5, 3.5]),
};
let row1 = Composed {
opt_bx_i32: None,
bx_opt_bool: Box::new(None),
vec_arc_string: vec![],
bx_vec_f64: Box::new(vec![]),
};
let composed_df = vec![row0, row1].as_slice().to_dataframe().unwrap();
assert_eq!(composed_df.shape(), (2, 4));
assert!(matches!(
composed_df.column("opt_bx_i32").unwrap().get(0).unwrap(),
AnyValue::Int32(123)
));
assert!(matches!(
composed_df.column("opt_bx_i32").unwrap().get(1).unwrap(),
AnyValue::Null
));
assert!(matches!(
composed_df.column("bx_opt_bool").unwrap().get(0).unwrap(),
AnyValue::Boolean(true)
));
assert!(matches!(
composed_df.column("bx_opt_bool").unwrap().get(1).unwrap(),
AnyValue::Null
));
let v = composed_df
.column("vec_arc_string")
.unwrap()
.get(0)
.unwrap();
let AnyValue::List(inner) = v else {
panic!("expected list")
};
let strs: Vec<String> = inner
.iter()
.map(|av| match av {
AnyValue::String(s) => s.to_string(),
AnyValue::StringOwned(s) => s.to_string(),
other => panic!("unexpected list elem {other:?}"),
})
.collect();
assert_eq!(strs, vec!["a".to_string(), "b".to_string()]);
let v = composed_df.column("bx_vec_f64").unwrap().get(0).unwrap();
let AnyValue::List(inner) = v else {
panic!("expected list")
};
let nums: Vec<f64> = inner
.iter()
.map(|av| match av {
AnyValue::Float64(f) => f,
other => panic!("unexpected list elem {other:?}"),
})
.collect();
assert_eq!(nums, vec![1.5, 2.5, 3.5]);
let boundary_schema = BoundaryPositions::schema().unwrap();
assert_eq!(
schema_dtype(&boundary_schema, "opt_box_vec"),
DataType::List(Box::new(DataType::Int32))
);
assert_eq!(
schema_dtype(&boundary_schema, "opt_box_opt"),
DataType::Int32
);
assert_eq!(
schema_dtype(&boundary_schema, "vec_box_opt"),
DataType::List(Box::new(DataType::Int32))
);
assert_eq!(
schema_dtype(&boundary_schema, "vec_box_vec"),
DataType::List(Box::new(DataType::List(Box::new(DataType::Int32))))
);
assert_eq!(
schema_dtype(&boundary_schema, "vec_opt_arc_vec"),
DataType::List(Box::new(DataType::List(Box::new(DataType::Int32))))
);
assert_eq!(
schema_dtype(&boundary_schema, "opt_box_blob"),
DataType::Binary
);
let boundary_rows = vec![
BoundaryPositions {
opt_box_vec: Some(Box::new(vec![1, 2])),
opt_box_opt: Some(Box::new(Some(7))),
vec_box_opt: vec![Box::new(Some(10)), Box::new(None), Box::new(Some(30))],
vec_box_vec: vec![Box::new(vec![1, 2]), Box::new(vec![]), Box::new(vec![3])],
vec_opt_arc_vec: vec![Some(Arc::new(vec![5, 6])), None, Some(Arc::new(vec![]))],
opt_box_blob: Some(Box::new(b"abc".to_vec())),
},
BoundaryPositions {
opt_box_vec: None,
opt_box_opt: Some(Box::new(None)),
vec_box_opt: vec![],
vec_box_vec: vec![],
vec_opt_arc_vec: vec![],
opt_box_blob: None,
},
BoundaryPositions {
opt_box_vec: Some(Box::new(vec![])),
opt_box_opt: None,
vec_box_opt: vec![Box::new(None)],
vec_box_vec: vec![Box::new(vec![9])],
vec_opt_arc_vec: vec![None],
opt_box_blob: Some(Box::new(vec![])),
},
];
let boundary_df = boundary_rows.as_slice().to_dataframe().unwrap();
assert_eq!(boundary_df.shape(), (3, 6));
assert_list_i32(&boundary_df, "opt_box_vec", 0, &[Some(1), Some(2)]);
assert!(matches!(
boundary_df.column("opt_box_vec").unwrap().get(1).unwrap(),
AnyValue::Null
));
assert_list_i32(&boundary_df, "opt_box_vec", 2, &[]);
assert!(matches!(
boundary_df.column("opt_box_opt").unwrap().get(0).unwrap(),
AnyValue::Int32(7)
));
assert!(matches!(
boundary_df.column("opt_box_opt").unwrap().get(1).unwrap(),
AnyValue::Null
));
assert!(matches!(
boundary_df.column("opt_box_opt").unwrap().get(2).unwrap(),
AnyValue::Null
));
assert_list_i32(&boundary_df, "vec_box_opt", 0, &[Some(10), None, Some(30)]);
assert_list_i32(&boundary_df, "vec_box_opt", 1, &[]);
assert_list_i32(&boundary_df, "vec_box_opt", 2, &[None]);
assert_nested_list_i32(
&boundary_df,
"vec_box_vec",
0,
&[
Some(vec![Some(1), Some(2)]),
Some(vec![]),
Some(vec![Some(3)]),
],
);
assert_nested_list_i32(&boundary_df, "vec_box_vec", 1, &[]);
assert_nested_list_i32(&boundary_df, "vec_box_vec", 2, &[Some(vec![Some(9)])]);
assert_nested_list_i32(
&boundary_df,
"vec_opt_arc_vec",
0,
&[Some(vec![Some(5), Some(6)]), None, Some(vec![])],
);
assert_nested_list_i32(&boundary_df, "vec_opt_arc_vec", 1, &[]);
assert_nested_list_i32(&boundary_df, "vec_opt_arc_vec", 2, &[None]);
match boundary_df.column("opt_box_blob").unwrap().get(0).unwrap() {
AnyValue::Binary(bytes) => assert_eq!(bytes, b"abc"),
AnyValue::BinaryOwned(ref bytes) => assert_eq!(bytes.as_slice(), b"abc"),
other => panic!("unexpected opt_box_blob[0] {other:?}"),
}
assert!(matches!(
boundary_df.column("opt_box_blob").unwrap().get(1).unwrap(),
AnyValue::Null
));
match boundary_df.column("opt_box_blob").unwrap().get(2).unwrap() {
AnyValue::Binary(bytes) => assert_eq!(bytes, b""),
AnyValue::BinaryOwned(ref bytes) => assert_eq!(bytes.as_slice(), b""),
other => panic!("unexpected opt_box_blob[2] {other:?}"),
}
let regression_schema = Regression::schema().unwrap();
assert_eq!(
schema_dtype(®ression_schema, "bx_blob"),
DataType::Binary
);
assert_eq!(schema_dtype(®ression_schema, "arc_date"), DataType::Date);
assert_eq!(
schema_dtype(®ression_schema, "bx_chrono_dur"),
DataType::Duration(TimeUnit::Nanoseconds)
);
let reg0 = Regression {
bx_blob: Box::new(b"binary-blob".to_vec()),
arc_date: Arc::new(epoch),
bx_chrono_dur: Box::new(chrono::Duration::nanoseconds(123)),
};
let reg_df = reg0.to_dataframe().unwrap();
assert_eq!(reg_df.shape(), (1, 3));
match reg_df.column("bx_blob").unwrap().get(0).unwrap() {
AnyValue::Binary(b) => assert_eq!(b, b"binary-blob"),
AnyValue::BinaryOwned(ref b) => assert_eq!(b.as_slice(), b"binary-blob"),
other => panic!("unexpected bx_blob {other:?}"),
}
match reg_df.column("arc_date").unwrap().get(0).unwrap() {
AnyValue::Date(d) => assert_eq!(d, 0),
other => panic!("unexpected arc_date {other:?}"),
}
let empty = Bare::empty_dataframe().unwrap();
assert_eq!(empty.shape(), (0, 7));
let composed_empty = Composed::empty_dataframe().unwrap();
assert_eq!(composed_empty.shape(), (0, 4));
println!("Smart-pointer transparency test passed.");
}