use alloc::{
fmt,
string::{String, ToString},
sync::Arc,
vec,
vec::Vec,
};
use core::ops::{Add, Sub};
use gguppy_core::data::{ChunkedSlices, DataFrameAdapter, SeriesAdapter, SeriesAdapterError};
#[derive(Debug, PartialEq)]
pub enum BasicdfError {
ColumnNotFound(String),
NotYetImplemented(String),
}
impl core::fmt::Display for BasicdfError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
BasicdfError::NotYetImplemented(desc) => {
write!(f, "Not yet implemented: {desc}")
}
BasicdfError::ColumnNotFound(desc) => write!(f, "Column not found: {desc}",),
}
}
}
#[derive(Debug, PartialEq)]
pub enum BasicTypedArray {
Usize(Vec<usize>),
U8(Vec<u8>),
U16(Vec<u16>),
U32(Vec<u32>),
U64(Vec<u64>),
Isize(Vec<isize>),
I8(Vec<i8>),
I16(Vec<i16>),
I32(Vec<i32>),
I64(Vec<i64>),
F32(Vec<f32>),
F64(Vec<f64>),
Bool(Vec<bool>),
String(Vec<String>),
Char(Vec<char>),
}
#[rustfmt::skip]
macro_rules! apply_all_basic_typed_array {
($typed_array_expr:expr, $handler:ident) => {
match $typed_array_expr {
$crate::basicdf::BasicTypedArray::Usize(v) => $handler!(Usize, v),
$crate::basicdf::BasicTypedArray::U8(v) => $handler!(U8, v),
$crate::basicdf::BasicTypedArray::U16(v) => $handler!(U16, v),
$crate::basicdf::BasicTypedArray::U32(v) => $handler!(U32, v),
$crate::basicdf::BasicTypedArray::U64(v) => $handler!(U64, v),
$crate::basicdf::BasicTypedArray::Isize(v) => $handler!(Isize, v),
$crate::basicdf::BasicTypedArray::I8(v) => $handler!(I8, v),
$crate::basicdf::BasicTypedArray::I16(v) => $handler!(I16, v),
$crate::basicdf::BasicTypedArray::I32(v) => $handler!(I32, v),
$crate::basicdf::BasicTypedArray::I64(v) => $handler!(I64, v),
$crate::basicdf::BasicTypedArray::F32(v) => $handler!(F32, v),
$crate::basicdf::BasicTypedArray::F64(v) => $handler!(F64, v),
$crate::basicdf::BasicTypedArray::Bool(v) => $handler!(Bool, v),
$crate::basicdf::BasicTypedArray::String(v) => $handler!(String, v),
$crate::basicdf::BasicTypedArray::Char(v) => $handler!(Char, v),
}
};
}
#[rustfmt::skip]
impl BasicTypedArray {
#[must_use]
pub fn len(&self) -> usize {
macro_rules! variant_len {($variant:ident, $v:ident) => {$v.len()};}
apply_all_basic_typed_array!(self, variant_len)
}
#[must_use]
pub fn is_empty(&self) -> bool {
macro_rules! variant_is_empty {($variant:ident, $v:ident) => {$v.is_empty()};}
apply_all_basic_typed_array!(self, variant_is_empty)
}
}
#[derive(Debug, PartialEq)]
pub struct BasicNamedArray {
pub values: BasicTypedArray,
pub name: String,
}
impl fmt::Display for BasicNamedArray {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}: ", self.name)?;
write!(f, "{:?}", &self.values)?;
Ok(())
}
}
pub trait FromNamedArray<T> {
fn new(name: &str, v: Vec<T>) -> Self;
}
macro_rules! impl_into_basic_named_array {
($type:ty, $dtype:ident) => {
impl crate::basicdf::FromNamedArray<$type> for crate::basicdf::BasicNamedArray {
fn new(name: &str, v: Vec<$type>) -> Self {
Self {
values: crate::basicdf::BasicTypedArray::$dtype(v),
name: name.to_string(),
}
}
}
impl crate::basicdf::FromNamedArray<$type> for crate::basicdf::BasicSeries {
fn new(name: &str, v: Vec<$type>) -> Self {
Self(alloc::sync::Arc::new(crate::basicdf::BasicNamedArray::new(
name, v,
)))
}
}
};
}
impl_into_basic_named_array!(usize, Usize);
impl_into_basic_named_array!(u8, U8);
impl_into_basic_named_array!(u16, U16);
impl_into_basic_named_array!(u32, U32);
impl_into_basic_named_array!(u64, U64);
impl_into_basic_named_array!(isize, Isize);
impl_into_basic_named_array!(i8, I8);
impl_into_basic_named_array!(i16, I16);
impl_into_basic_named_array!(i32, I32);
impl_into_basic_named_array!(i64, I64);
impl_into_basic_named_array!(f32, F32);
impl_into_basic_named_array!(f64, F64);
impl_into_basic_named_array!(bool, Bool);
impl_into_basic_named_array!(char, Char);
impl FromNamedArray<&str> for BasicNamedArray {
fn new(name: &str, v: Vec<&str>) -> Self {
Self {
values: BasicTypedArray::String(v.iter().map(ToString::to_string).collect()),
name: name.to_string(),
}
}
}
impl FromNamedArray<&str> for BasicSeries {
fn new(name: &str, v: Vec<&str>) -> Self {
Self(Arc::new(BasicNamedArray::new(name, v)))
}
}
impl FromNamedArray<String> for BasicNamedArray {
fn new(name: &str, v: Vec<String>) -> Self {
Self {
values: BasicTypedArray::String(v),
name: name.to_string(),
}
}
}
impl FromNamedArray<String> for BasicSeries {
fn new(name: &str, v: Vec<String>) -> Self {
Self(Arc::new(BasicNamedArray::new(name, v)))
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct BasicSeries(pub Arc<BasicNamedArray>);
impl From<BasicNamedArray> for BasicSeries {
fn from(array: BasicNamedArray) -> Self {
BasicSeries(Arc::new(array))
}
}
impl core::ops::Deref for BasicSeries {
type Target = BasicNamedArray;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl SeriesAdapter<'_> for BasicSeries {
type LibError = BasicdfError;
fn name(&self) -> &str {
&self.0.name
}
fn len(&self) -> usize {
self.0.values.len()
}
fn is_empty(&self) -> bool {
self.0.values.is_empty()
}
fn as_chunked_slices(&self) -> Result<ChunkedSlices<'_>, SeriesAdapterError> {
macro_rules! variant_as_slice {
($variant:ident, $v:ident) => {
gguppy_core::data::ChunkedSlices::$variant(vec![$v.as_slice()])
};
}
Ok(apply_all_basic_typed_array!(
&self.0.values,
variant_as_slice
))
}
}
impl Add for BasicSeries {
type Output = Result<Self, BasicdfError>;
fn add(self, rhs: Self) -> Self::Output {
match (&self.0.values, &rhs.0.values) {
(BasicTypedArray::I32(v1), BasicTypedArray::I32(v2)) => {
let result: Vec<i32> = v1.iter().zip(v2.iter()).map(|(a, b)| a + b).collect();
Ok(BasicSeries(Arc::new(BasicNamedArray::new(
"unnamed", result,
))))
}
(BasicTypedArray::F64(v1), BasicTypedArray::F64(v2)) => {
let result: Vec<f64> = v1.iter().zip(v2.iter()).map(|(a, b)| a + b).collect();
Ok(BasicSeries(Arc::new(BasicNamedArray::new(
"unnamed", result,
))))
}
_ => Err(BasicdfError::NotYetImplemented(
"Addition for this type is not implemented".to_string(),
)),
}
}
}
impl Sub for BasicSeries {
type Output = Result<Self, BasicdfError>;
fn sub(self, rhs: Self) -> Self::Output {
match (&self.0.values, &rhs.0.values) {
(BasicTypedArray::I32(v1), BasicTypedArray::I32(v2)) => {
let result: Vec<i32> = v1.iter().zip(v2.iter()).map(|(a, b)| a - b).collect();
Ok(BasicSeries(Arc::new(BasicNamedArray::new(
"unnamed", result,
))))
}
(BasicTypedArray::F64(v1), BasicTypedArray::F64(v2)) => {
let result: Vec<f64> = v1.iter().zip(v2.iter()).map(|(a, b)| a - b).collect();
Ok(BasicSeries(Arc::new(BasicNamedArray::new(
"unnamed", result,
))))
}
_ => Err(BasicdfError::NotYetImplemented(
"Subtraction for this type is not implemented".to_string(),
)),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct BasicDataFrame {
pub columns: Vec<BasicSeries>,
}
impl BasicDataFrame {
#[must_use]
pub fn new(columns: Vec<BasicSeries>) -> Self {
Self { columns }
}
}
impl DataFrameAdapter<'_> for BasicDataFrame {
type DataFrame = BasicDataFrame;
type Series = BasicSeries;
type LibError = BasicdfError;
fn column_names(&self) -> Vec<String> {
self.columns.iter().map(|col| col.0.name.clone()).collect()
}
fn col(&self, name: &str) -> Result<Self::Series, Self::LibError> {
self.columns
.iter()
.find(|col| col.0.name == name)
.cloned()
.ok_or_else(|| BasicdfError::ColumnNotFound(name.to_string()))
}
fn select(&self, names: Vec<&str>) -> Result<Self::DataFrame, Self::LibError> {
let selected_columns: Vec<BasicSeries> = names
.iter()
.map(|&name| {
self.columns
.iter()
.find(|col| col.0.name == name)
.cloned()
.ok_or_else(|| BasicdfError::ColumnNotFound(name.to_string()))
})
.collect::<Result<Vec<_>, _>>()?;
Ok(BasicDataFrame::new(selected_columns))
}
fn shape(&self) -> (usize, usize) {
let ncols = self.columns.len();
let nrows = self
.columns
.iter()
.map(|col| col.0.values.len())
.max()
.unwrap_or(0);
(nrows, ncols)
}
}
impl fmt::Display for BasicDataFrame {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.columns.is_empty() {
return Ok(());
}
let max_len = self
.columns
.iter()
.map(|col| col.0.values.len())
.max()
.unwrap_or(0);
let disp_rows = 10;
for row in 0..max_len.min(disp_rows) {
for (i, series) in self.columns.iter().enumerate() {
if i > 0 {
write!(f, "\t")?;
}
macro_rules! variant_to_string {
(String, $v:ident) => {
$v.get(row).cloned()
};
($variant:ident, $v:ident) => {
$v.get(row).map(ToString::to_string)
};
}
let value = apply_all_basic_typed_array!(&series.0.values, variant_to_string);
match value {
Some(val) => write!(f, "{val}")?,
None => write!(f, "")?,
}
}
writeln!(f)?;
}
Ok(())
}
}
#[macro_export]
macro_rules! basic_df {
($($col_name:expr => $slice:expr), + $(,)?) => {
{
$crate::basicdf::BasicDataFrame::new(vec![
$($crate::basicdf::BasicSeries::new($col_name, $slice),)+
]
)
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn basicdf_error_display() {
let error = BasicdfError::ColumnNotFound("test_column".to_string());
assert_eq!(error.to_string(), "Column not found: test_column");
let error = BasicdfError::NotYetImplemented("test_functionality".to_string());
assert_eq!(error.to_string(), "Not yet implemented: test_functionality");
}
#[test]
fn basic_typed_array_len() {
let array = BasicTypedArray::Usize(vec![1, 2, 3]);
assert_eq!(array.len(), 3);
assert!(!array.is_empty());
}
#[test]
fn basic_typed_array_empty() {
assert!(BasicTypedArray::Usize(vec![]).is_empty());
}
#[test]
fn basic_named_array_new() {
let mut named_array: BasicNamedArray;
named_array = BasicNamedArray::new("test", vec![1usize, 2, 3]);
assert_eq!(named_array.name, "test");
assert_eq!(named_array.values, BasicTypedArray::Usize(vec![1, 2, 3]));
named_array = BasicNamedArray::new(
"test",
vec!["a".to_string(), "b".to_string(), "c".to_string()],
);
assert_eq!(
named_array.values,
BasicTypedArray::String(vec!["a".to_string(), "b".to_string(), "c".to_string()])
);
named_array = BasicNamedArray::new("test", vec!["a", "b", "c"]);
assert_eq!(
named_array.values,
BasicTypedArray::String(vec!["a".to_string(), "b".to_string(), "c".to_string()])
);
}
#[test]
fn basic_named_array_display() {
let named_array = BasicNamedArray::new("test", vec![1usize, 2, 3]);
assert_eq!(named_array.to_string(), "test: Usize([1, 2, 3])");
}
#[test]
fn basic_series_new() {
let mut series: BasicSeries;
series = BasicSeries::new("test", vec![1usize, 2, 3]);
assert_eq!(series.name(), "test");
assert_eq!(series.len(), 3);
assert!(!series.is_empty());
series = BasicSeries::new(
"test",
vec!["a".to_string(), "b".to_string(), "c".to_string()],
);
assert_eq!(series.name(), "test");
assert_eq!(series.len(), 3);
assert!(!series.is_empty());
series = BasicSeries::new("test", vec!["a", "b", "c"]);
assert_eq!(series.name(), "test");
assert_eq!(series.len(), 3);
assert!(!series.is_empty());
}
#[test]
fn basic_series_empty() {
assert!(<BasicSeries as FromNamedArray::<usize>>::new("", vec![]).is_empty());
}
#[test]
fn basic_series_from_basic_named_array() {
let named_array = BasicNamedArray::new("test", vec![1usize, 2, 3]);
let series: BasicSeries = named_array.into();
assert_eq!(series.name(), "test");
assert_eq!(series.len(), 3);
assert_eq!(series.0.values, BasicTypedArray::Usize(vec![1, 2, 3]));
}
#[test]
fn series_adapter_name() {
let series = BasicSeries::new("test", vec![1usize, 2, 3]);
assert_eq!(series.name(), "test");
}
#[test]
fn series_adapter_len() {
let series = BasicSeries::new("test", vec![1usize, 2, 3]);
assert_eq!(<BasicSeries as SeriesAdapter>::len(&series), 3);
assert!(!<BasicSeries as SeriesAdapter>::is_empty(&series));
}
#[test]
fn series_adapter_empty() {
let series = BasicSeries::new("test", vec![0u8; 0]);
assert_eq!(<BasicSeries as SeriesAdapter>::len(&series), 0);
assert!(<BasicSeries as SeriesAdapter>::is_empty(&series));
}
#[test]
fn series_adapter_as_chunked_slice() {
let series = BasicSeries::new("test", vec![1usize, 2, 3]);
let chunked_slices = series.as_chunked_slices().unwrap();
assert_eq!(chunked_slices, ChunkedSlices::Usize(vec![&[1, 2, 3]]));
}
#[test]
fn series_adapter_add() {
let series1 = BasicSeries::new("test1", vec![1i32, 2, 3]);
let series2 = BasicSeries::new("test2", vec![4i32, 5, 6]);
let result = series1 + series2;
assert!(result.is_ok());
let result_series = result.unwrap();
assert_eq!(result_series.name(), "unnamed");
assert_eq!(result_series.len(), 3);
assert_eq!(result_series.0.values, BasicTypedArray::I32(vec![5, 7, 9]));
}
#[test]
fn series_adapter_sub() {
let series1 = BasicSeries::new("test1", vec![4i32, 5, 6]);
let series2 = BasicSeries::new("test2", vec![1i32, 2, 3]);
let result = series1 - series2;
assert!(result.is_ok());
let result_series = result.unwrap();
assert_eq!(result_series.name(), "unnamed");
assert_eq!(result_series.len(), 3);
assert_eq!(result_series.0.values, BasicTypedArray::I32(vec![3, 3, 3]));
}
#[test]
fn basicdf_new() {
let series1 = BasicSeries::new("col1", vec![1usize, 2, 3]);
let series2 = BasicSeries::new("col2", vec![4usize, 5, 6]);
let df = BasicDataFrame::new(vec![series1, series2]);
assert_eq!(df.columns.len(), 2);
assert_eq!(df.columns[0].name(), "col1");
assert_eq!(df.columns[1].name(), "col2");
assert_eq!(df.shape(), (3, 2));
}
#[test]
fn basic_df_macro() {
let df = basic_df!(
"col1" => vec![1usize, 2, 3],
"col2" => vec![4usize, 5, 6],
);
assert_eq!(df.columns.len(), 2);
assert_eq!(df.columns[0].name(), "col1");
assert_eq!(df.columns[1].name(), "col2");
assert_eq!(df.shape(), (3, 2));
}
#[test]
fn basicdf_display() {
let df = basic_df!(
"col1" => vec![1usize, 2, 3],
"col2" => vec![4usize, 5, 6],
);
let expected_output = "1\t4\n2\t5\n3\t6\n";
let output = df.to_string();
assert_eq!(output, expected_output);
}
#[test]
fn dataframe_adapter_column_names() {
let df = basic_df![
"alpha" => vec![1, 2, 3],
"beta" => vec![4, 5, 6],
"gamma" => vec![7, 8, 9]
];
let names = df.column_names();
assert_eq!(names.len(), 3);
assert!(names.contains(&"alpha".to_string()));
assert!(names.contains(&"beta".to_string()));
assert!(names.contains(&"gamma".to_string()));
assert_eq!(
names,
vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
);
}
#[test]
fn dataframe_adapter_col() {
let df = basic_df![
"numbers" => vec![10, 20, 30],
"letters" => vec!["x", "y", "z"]
];
let numbers_col = df.col("numbers").unwrap();
assert_eq!(numbers_col.name(), "numbers");
assert_eq!(numbers_col.len(), 3);
let letters_col = df.col("letters").unwrap();
assert_eq!(letters_col.name(), "letters");
assert_eq!(letters_col.len(), 3);
let result = df.col("nonexistent");
assert!(result.is_err());
if let Err(BasicdfError::ColumnNotFound(name)) = result {
assert_eq!(name, "nonexistent");
} else {
panic!("Expected ColumnNotFound error");
}
}
#[test]
fn dataframe_adapter_select() {
let df = basic_df![
"a" => vec![1, 2, 3],
"b" => vec![4, 5, 6],
"c" => vec![7, 8, 9],
"d" => vec!["x", "y", "z"]
];
let selected = df.select(vec!["a", "c"]).unwrap();
assert_eq!(selected.shape(), (3, 2));
assert_eq!(
selected.column_names(),
vec!["a".to_string(), "c".to_string()]
);
let single = df.select(vec!["d"]).unwrap();
assert_eq!(single.shape(), (3, 1));
assert_eq!(single.column_names(), vec!["d".to_string()]);
let empty = df.select(vec![]).unwrap();
assert_eq!(empty.shape(), (0, 0));
let result = df.select(vec!["a", "nonexistent"]);
assert!(result.is_err());
if let Err(BasicdfError::ColumnNotFound(name)) = result {
assert_eq!(name, "nonexistent");
} else {
panic!("Expected ColumnNotFound error");
}
}
#[test]
fn dataframe_adapter_shape() {
let df = basic_df![
"col1" => vec![1, 2, 3, 4, 5],
"col2" => vec!["a", "b", "c", "d", "e"]
];
assert_eq!(df.shape(), (5, 2));
let empty_df = BasicDataFrame::new(vec![]);
assert_eq!(empty_df.shape(), (0, 0));
let empty_cols_df = basic_df!["empty" => vec![0i32; 0]];
assert_eq!(empty_cols_df.shape(), (0, 1));
let uneven = BasicDataFrame::new(vec![
BasicSeries(Arc::new(BasicNamedArray::new("short", vec![1, 2]))),
BasicSeries(Arc::new(BasicNamedArray::new("long", vec![1, 2, 3, 4, 5]))),
]);
assert_eq!(uneven.shape(), (5, 2)); }
#[test]
fn basic_df_macro_edge_cases() {
let df1 = basic_df!["single" => vec![42]];
assert_eq!(df1.columns.len(), 1);
assert_eq!(df1.col("single").unwrap().len(), 1);
let df2 = basic_df![
"a" => vec![1, 2],
"b" => vec![3, 4],
];
assert_eq!(df2.columns.len(), 2);
let df3 = basic_df![
"integers" => vec![1, 2, 3],
"floats" => vec![1.0f32, 2.0, 3.0],
"bools" => vec![true, false, true],
"strings" => vec!["x", "y", "z"]
];
assert_eq!(df3.columns.len(), 4);
assert_eq!(df3.shape(), (3, 4));
}
#[test]
fn basic_series_clone_and_equality() {
let series1 = BasicSeries(Arc::new(BasicNamedArray::new("test", vec![1, 2, 3])));
let series2 = series1.clone();
assert_eq!(series1, series2);
assert!(Arc::ptr_eq(&series1.0, &series2.0));
let series3 = BasicSeries(Arc::new(BasicNamedArray::new("test", vec![1, 2, 4])));
assert_ne!(series1, series3);
let series4 = BasicSeries(Arc::new(BasicNamedArray::new("different", vec![1, 2, 3])));
assert_ne!(series1, series4);
}
#[test]
fn basic_dataframe_clone_and_equality() {
let df1 = basic_df!["col1" => vec![1, 2], "col2" => vec![3, 4]];
let df2 = df1.clone();
assert_eq!(df1, df2);
let df3 = basic_df!["col1" => vec![1, 2], "col2" => vec![3, 5]];
assert_ne!(df1, df3);
}
#[test]
fn large_dataframe_performance() {
let large_vec: Vec<i32> = (0..1000).collect();
let df = basic_df!["large_col" => large_vec];
assert_eq!(df.shape(), (1000, 1));
assert_eq!(df.column_names(), vec!["large_col".to_string()]);
let col = df.col("large_col").unwrap();
assert_eq!(col.len(), 1000);
}
}