use alloc::{
boxed::Box,
format,
string::{String, ToString},
vec::Vec,
};
use core::ops::{Add, Sub};
use gguppy_core::data::{ChunkedSlices, DataFrameAdapter, SeriesAdapter, SeriesAdapterError};
use polars::prelude::*;
pub struct PolarsSeriesAdapter(pub Series);
impl SeriesAdapter<'_> for PolarsSeriesAdapter {
type LibError = PolarsError;
fn name(&self) -> &str {
self.0.name()
}
fn len(&self) -> usize {
self.0.len()
}
fn is_empty(&self) -> bool {
self.0.is_empty()
}
fn as_chunked_slices(&self) -> Result<ChunkedSlices<'_>, SeriesAdapterError> {
match self.0.dtype() {
DataType::Int32 => match self.0.i32() {
Ok(ca) => {
let chunks: Vec<&[i32]> = ca
.downcast_iter()
.map(|array| array.values().as_slice())
.collect();
Ok(ChunkedSlices::I32(chunks))
}
Err(e) => Err(SeriesAdapterError::DowncastError {
expected_type: "Int32".to_string(),
actual_type: format!("{:?}", self.0.dtype()),
reason: e.to_string(),
}),
},
DataType::Int64 => match self.0.i64() {
Ok(ca) => {
let chunks: Vec<&[i64]> = ca
.downcast_iter()
.map(|array| array.values().as_slice())
.collect();
Ok(ChunkedSlices::I64(chunks))
}
Err(e) => Err(SeriesAdapterError::DowncastError {
expected_type: "Int64".to_string(),
actual_type: format!("{:?}", self.0.dtype()),
reason: e.to_string(),
}),
},
DataType::Float32 => match self.0.f32() {
Ok(ca) => {
let chunks: Vec<&[f32]> = ca
.downcast_iter()
.map(|array| array.values().as_slice())
.collect();
Ok(ChunkedSlices::F32(chunks))
}
Err(e) => Err(SeriesAdapterError::DowncastError {
expected_type: "Float32".to_string(),
actual_type: format!("{:?}", self.0.dtype()),
reason: e.to_string(),
}),
},
DataType::Float64 => match self.0.f64() {
Ok(ca) => {
let chunks: Vec<&[f64]> = ca
.downcast_iter()
.map(|array| array.values().as_slice())
.collect();
Ok(ChunkedSlices::F64(chunks))
}
Err(e) => Err(SeriesAdapterError::DowncastError {
expected_type: "Float64".to_string(),
actual_type: format!("{:?}", self.0.dtype()),
reason: e.to_string(),
}),
},
DataType::Boolean => {
match self.0.bool() {
Ok(ca) => {
let chunks: Vec<Vec<bool>> = ca
.downcast_iter()
.map(|array| (0..array.len()).map(|i| array.value(i)).collect())
.collect();
let chunk_refs: Vec<&[bool]> = chunks
.into_iter()
.map(|chunk| Box::leak(chunk.into_boxed_slice()) as &[bool])
.collect();
Ok(ChunkedSlices::Bool(chunk_refs))
}
Err(e) => Err(SeriesAdapterError::DowncastError {
expected_type: "Boolean".to_string(),
actual_type: format!("{:?}", self.0.dtype()),
reason: e.to_string(),
}),
}
}
_ => Err(SeriesAdapterError::UnsupportedDataType(format!(
"{:?}",
self.0.dtype()
))),
}
}
}
impl Add for PolarsSeriesAdapter {
type Output = Result<Self, PolarsError>;
fn add(self, rhs: Self) -> Self::Output {
(&self.0 + &rhs.0).map(PolarsSeriesAdapter)
}
}
impl Sub for PolarsSeriesAdapter {
type Output = Result<Self, PolarsError>;
fn sub(self, rhs: Self) -> Self::Output {
(&self.0 - &rhs.0).map(PolarsSeriesAdapter)
}
}
pub struct PolarsDataFrameAdapter(pub DataFrame);
impl DataFrameAdapter<'_> for PolarsDataFrameAdapter {
type DataFrame = PolarsDataFrameAdapter;
type Series = PolarsSeriesAdapter;
type LibError = PolarsError;
fn column_names(&self) -> Vec<String> {
self.0
.get_column_names_owned()
.into_iter()
.map(|s| s.to_string())
.collect()
}
fn col(&self, name: &str) -> Result<Self::Series, Self::LibError> {
let column = self.0.column(name)?;
let series = column.as_materialized_series();
Ok(PolarsSeriesAdapter(series.clone()))
}
fn select(&self, names: Vec<&str>) -> Result<Self::DataFrame, Self::LibError> {
let df = self.0.select(names)?;
Ok(PolarsDataFrameAdapter(df))
}
fn shape(&self) -> (usize, usize) {
self.0.shape()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn series_adapter_name() {
let series = Series::new("test".into(), &[1, 2, 3]);
assert_eq!(PolarsSeriesAdapter(series).name(), "test");
}
#[test]
fn series_adapter_len() {
let series = PolarsSeriesAdapter(Series::new("test".into(), &[1, 2, 3]));
assert_eq!(series.len(), 3);
assert!(!series.is_empty());
}
#[test]
fn series_adapter_empty() {
let series = PolarsSeriesAdapter(Series::new("empty".into(), &[0f32; 0]));
assert_eq!(series.len(), 0);
assert!(series.is_empty());
}
#[test]
fn series_adapter_as_chunked_slice() {
let series = PolarsSeriesAdapter(Series::new("Int32".into(), vec![1i32, 2, 3]));
let chunked_slices = series.as_chunked_slices();
assert!(chunked_slices.is_ok());
assert!(matches!(chunked_slices, Ok(ChunkedSlices::I32(_))));
if let Ok(ChunkedSlices::I32(chunks)) = chunked_slices {
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], &[1, 2, 3]);
}
}
#[test]
fn series_adapter_add() {
let adapter1 = PolarsSeriesAdapter(Series::new("a".into(), &[1, 2, 3]));
let adapter2 = PolarsSeriesAdapter(Series::new("b".into(), &[4, 5, 6]));
let result = adapter1 + adapter2;
assert!(result.is_ok());
let result_series = result.unwrap();
assert_eq!(result_series.name(), "a"); assert_eq!(result_series.len(), 3);
assert!(matches!(result_series.0.dtype(), DataType::Int32));
assert_eq!(
result_series
.0
.i32()
.unwrap()
.into_no_null_iter()
.collect::<Vec<_>>(),
vec![5, 7, 9]
);
}
#[test]
fn series_adapter_sub() {
let adapter1 = PolarsSeriesAdapter(Series::new("a".into(), &[4, 5, 6]));
let adapter2 = PolarsSeriesAdapter(Series::new("b".into(), &[1, 2, 3]));
let result = adapter1 - adapter2;
assert!(result.is_ok());
let result_series = result.unwrap();
assert_eq!(result_series.name(), "a"); assert_eq!(result_series.len(), 3);
assert_eq!(
result_series
.0
.i32()
.unwrap()
.into_no_null_iter()
.collect::<Vec<_>>(),
vec![3, 3, 3]
);
}
#[test]
fn dataframe_adapter_column_names() {
let df = df!(
"alpha" => [1, 2, 3],
"beta" => [4, 5, 6],
"gamma" => [7, 8, 9]
)
.unwrap();
let adapter = PolarsDataFrameAdapter(df);
let names = adapter.column_names();
assert_eq!(names.len(), 3);
assert!(names.contains(&"alpha".to_string()));
assert!(names.contains(&"beta".to_string()));
assert!(names.contains(&"gamma".to_string()));
assert_eq!(
names,
vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
);
}
#[test]
fn dataframe_adapter_col() {
let df = df![
"numbers" => [10, 20, 30],
"letters" => ["x", "y", "z"]
]
.unwrap();
let adapter = PolarsDataFrameAdapter(df);
let numbers_col = adapter.col("numbers").unwrap();
assert_eq!(numbers_col.name(), "numbers");
assert_eq!(numbers_col.len(), 3);
let letters_col = adapter.col("letters").unwrap();
assert_eq!(letters_col.name(), "letters");
assert_eq!(letters_col.len(), 3);
let result = adapter.col("nonexistent");
assert!(result.is_err());
assert!(matches!(result, Err(PolarsError::ColumnNotFound(_))));
if let Err(PolarsError::ColumnNotFound(name)) = result {
assert_eq!(name.to_string(), "\"nonexistent\" not found");
}
}
#[test]
fn dataframe_adapter_select() {
let df = df![
"a" => [1, 2, 3],
"b" => [4, 5, 6],
"c" => [7, 8, 9],
"d" => ["x", "y", "z"]
]
.unwrap();
let adapter = PolarsDataFrameAdapter(df);
let selected = adapter.select(vec!["a", "c"]).unwrap();
assert_eq!(selected.shape(), (3, 2));
assert_eq!(
selected.column_names(),
vec!["a".to_string(), "c".to_string()]
);
let single = adapter.select(vec!["d"]).unwrap();
assert_eq!(single.shape(), (3, 1));
assert_eq!(single.column_names(), vec!["d".to_string()]);
let empty = adapter.select(vec![]).unwrap();
assert_eq!(empty.shape(), (3, 0));
let result = adapter.select(vec!["a", "nonexistent"]);
assert!(result.is_err());
assert!(matches!(result, Err(PolarsError::ColumnNotFound(_))));
if let Err(PolarsError::ColumnNotFound(name)) = result {
assert_eq!(name.to_string(), "\"nonexistent\" not found");
}
}
}