gguppy_data 0.4.0

Traits and adapters used by gguppy for dataframe access
Documentation
//! Static DataFrame-like abstraction for `gguppy`
//!
//! - Provides the [`StaticDataFrame`] and [`StaticTypedArray`] types
//! - The [`StaticDataFrame`] represents a tabular data structure in columnar format. Defines a
//!   const generic struct with `ROWS` and `COLS` and const generic parameters of type `usize`. This
//!   enables the definition of `static` and `const` `DataFrame` structures which represents fixed
//!   data
//!
//! ## Design Philosophy
//!
//! - Smallest possible feature set to enable sample data (e.g. mtcars) definitions with `static`.
//! - `static` preferred over `const` because the compiler inlines const variables upon use. Static
//!   items result in only one instance and has a fixed location in memory.
//! - Converts to a [`BasicDataFrame`] and [`BasicSeries`] for DataFrame-like features

use alloc::{
    string::{String, ToString},
    sync::Arc,
    vec::Vec,
};

use gguppy_core::data::DataFrameAdapter;

use crate::basicdf::{BasicDataFrame, BasicNamedArray, BasicSeries, BasicTypedArray, BasicdfError};

#[derive(Debug)]
pub enum StaticTypedArray<const N: usize> {
    I32([i32; N]),
    F32([f32; N]),
    String([&'static str; N]),
}

#[derive(Debug)]
pub struct StaticDataFrame<const ROWS: usize, const COLS: usize> {
    pub columns: [StaticTypedArray<ROWS>; COLS],
    pub names: [&'static str; COLS],
}

impl<const N: usize> StaticTypedArray<N> {
    /// Convert a [`StaticTypedArray`] to a [`BasicSeries`] with a given name
    fn to_basic_series(&self, name: &str) -> BasicSeries {
        let basic_typed_array = match self {
            StaticTypedArray::I32(arr) => BasicTypedArray::I32(arr.to_vec()),
            StaticTypedArray::F32(arr) => BasicTypedArray::F32(arr.to_vec()),
            StaticTypedArray::String(arr) => {
                BasicTypedArray::String(arr.iter().map(|s| (*s).to_string()).collect())
            }
        };
        BasicSeries(Arc::new(BasicNamedArray {
            values: basic_typed_array,
            name: name.to_string(),
        }))
    }
}

impl<const ROWS: usize, const COLS: usize> DataFrameAdapter<'_> for StaticDataFrame<ROWS, COLS> {
    type DataFrame = BasicDataFrame;
    type Series = BasicSeries;
    type LibError = BasicdfError;

    fn column_names(&self) -> Vec<String> {
        self.names.iter().map(|&s| s.to_string()).collect()
    }

    fn col(&self, name: &str) -> Result<Self::Series, Self::LibError> {
        self.names
            .iter()
            .enumerate()
            .find(|(_, col_name)| **col_name == name)
            .map(|(i, col_name)| self.columns[i].to_basic_series(col_name))
            .ok_or_else(|| BasicdfError::ColumnNotFound(name.to_string()))
    }

    fn select(&self, names: Vec<&str>) -> Result<Self::DataFrame, Self::LibError> {
        let selected_columns: Vec<BasicSeries> = names
            .iter()
            .map(|&name| self.col(name))
            .collect::<Result<Vec<_>, _>>()?;
        Ok(BasicDataFrame::new(selected_columns))
    }

    fn shape(&self) -> (usize, usize) {
        (ROWS, COLS)
    }
}

#[cfg(test)]
mod tests {
    use core::f32;

    use gguppy_core::data::{DataFrameAdapter, SeriesAdapter};

    use super::*;

    // Test static DataFrame creation and basic operations
    static TEST_STATIC_DF: StaticDataFrame<3, 2> = StaticDataFrame {
        columns: [
            StaticTypedArray::I32([1, 2, 3]),
            StaticTypedArray::String(["a", "b", "c"]),
        ],
        names: ["numbers", "letters"],
    };

    #[test]
    fn static_typed_array_to_basic_series() {
        let i32_array = StaticTypedArray::I32([10, 20, 30]);
        let series = i32_array.to_basic_series("test_col");

        assert_eq!(series.name(), "test_col");
        assert_eq!(series.len(), 3);

        if let BasicTypedArray::I32(values) = &series.values {
            assert_eq!(*values, vec![10, 20, 30]);
        } else {
            panic!("Expected I32 array");
        }

        let f32_array = StaticTypedArray::F32([1.5, 2.5, 3.5]);
        let f32_series = f32_array.to_basic_series("floats");

        assert_eq!(f32_series.name(), "floats");
        if let BasicTypedArray::F32(values) = &f32_series.values {
            assert_eq!(*values, vec![1.5, 2.5, 3.5]);
        } else {
            panic!("Expected F32 array");
        }

        let str_array = StaticTypedArray::String(["hello", "world", "test"]);
        let str_series = str_array.to_basic_series("text");

        assert_eq!(str_series.name(), "text");
        if let BasicTypedArray::String(values) = &str_series.values {
            assert_eq!(
                *values,
                vec!["hello".to_string(), "world".to_string(), "test".to_string()]
            );
        } else {
            panic!("Expected String array");
        }
    }

    #[test]
    fn static_dataframe_adapter_column_names() {
        let names = TEST_STATIC_DF.column_names();
        assert_eq!(names, vec!["numbers".to_string(), "letters".to_string()]);
    }

    #[test]
    fn static_dataframe_adapter_col() {
        // Test successful column retrieval
        let numbers_col = TEST_STATIC_DF.col("numbers").unwrap();
        assert_eq!(numbers_col.name(), "numbers");
        assert_eq!(numbers_col.len(), 3);

        if let BasicTypedArray::I32(values) = &numbers_col.values {
            assert_eq!(*values, vec![1, 2, 3]);
        } else {
            panic!("Expected I32 array");
        }

        let letters_col = TEST_STATIC_DF.col("letters").unwrap();
        assert_eq!(letters_col.name(), "letters");
        assert_eq!(letters_col.len(), 3);

        if let BasicTypedArray::String(values) = &letters_col.values {
            assert_eq!(
                *values,
                vec!["a".to_string(), "b".to_string(), "c".to_string()]
            );
        } else {
            panic!("Expected String array");
        }

        // Test error case
        let result = TEST_STATIC_DF.col("nonexistent");
        assert!(result.is_err());
        if let Err(BasicdfError::ColumnNotFound(name)) = result {
            assert_eq!(name, "nonexistent");
        } else {
            panic!("Expected ColumnNotFound error");
        }
    }

    #[test]
    fn static_dataframe_adapter_select() {
        // Test successful selection
        let selected = TEST_STATIC_DF.select(vec!["numbers"]).unwrap();
        assert_eq!(selected.columns.len(), 1);
        assert_eq!(selected.column_names(), vec!["numbers".to_string()]);

        let both_selected = TEST_STATIC_DF.select(vec!["numbers", "letters"]).unwrap();
        assert_eq!(both_selected.columns.len(), 2);
        assert_eq!(
            both_selected.column_names(),
            vec!["numbers".to_string(), "letters".to_string()]
        );

        // Test empty selection
        let empty = TEST_STATIC_DF.select(vec![]).unwrap();
        assert_eq!(empty.columns.len(), 0);

        // Test error case
        let result = TEST_STATIC_DF.select(vec!["numbers", "nonexistent"]);
        assert!(result.is_err());
        if let Err(BasicdfError::ColumnNotFound(name)) = result {
            assert_eq!(name, "nonexistent");
        } else {
            panic!("Expected ColumnNotFound error");
        }
    }

    #[test]
    fn static_dataframe_adapter_shape() {
        // Test edge case: single row, single column
        static SINGLE_CELL: StaticDataFrame<1, 1> = StaticDataFrame {
            columns: [StaticTypedArray::I32([42])],
            names: ["single"],
        };
        // Test empty rows (0 rows but with columns)
        static EMPTY_ROWS: StaticDataFrame<0, 2> = StaticDataFrame {
            columns: [StaticTypedArray::I32([]), StaticTypedArray::String([])],
            names: ["col1", "col2"],
        };

        assert_eq!(TEST_STATIC_DF.shape(), (3, 2));
        assert_eq!(SINGLE_CELL.shape(), (1, 1));
        assert_eq!(EMPTY_ROWS.shape(), (0, 2));
    }

    #[test]
    fn static_dataframe_different_sizes() {
        // Test different const generic sizes
        static LARGE_DF: StaticDataFrame<5, 3> = StaticDataFrame {
            columns: [
                StaticTypedArray::I32([1, 2, 3, 4, 5]),
                StaticTypedArray::F32([1.1, 2.2, 3.3, 4.4, 5.5]),
                StaticTypedArray::String(["alpha", "beta", "gamma", "delta", "epsilon"]),
            ],
            names: ["integers", "floats", "strings"],
        };

        assert_eq!(LARGE_DF.shape(), (5, 3));
        assert_eq!(LARGE_DF.column_names().len(), 3);

        let int_col = LARGE_DF.col("integers").unwrap();
        assert_eq!(int_col.len(), 5);

        let float_col = LARGE_DF.col("floats").unwrap();
        assert_eq!(float_col.len(), 5);

        let str_col = LARGE_DF.col("strings").unwrap();
        assert_eq!(str_col.len(), 5);
    }

    #[test]
    fn static_vs_basic_dataframe_compatibility() {
        // Test that StaticDataFrame operations return BasicDataFrame
        let selected = TEST_STATIC_DF.select(vec!["numbers"]).unwrap();

        // The result should be a BasicDataFrame
        assert_eq!(selected.columns.len(), 1);

        // Test that we can use BasicDataFrame operations on the result
        let col = selected.col("numbers").unwrap();
        assert_eq!(col.name(), "numbers");
        assert_eq!(col.len(), 3);

        // Test round-trip: StaticDataFrame -> select -> BasicDataFrame operations
        let shape = selected.shape();
        assert_eq!(shape, (3, 1));
    }

    #[test]
    fn static_dataframe_type_conversion_accuracy() {
        // Test that static arrays convert correctly to basic types
        let static_df: StaticDataFrame<2, 3> = StaticDataFrame {
            columns: [
                StaticTypedArray::I32([100, 200]),
                StaticTypedArray::F32([f32::consts::PI, f32::consts::E]),
                StaticTypedArray::String(["static", "conversion"]),
            ],
            names: ["ints", "floats", "texts"],
        };

        // Verify I32 conversion
        let int_series = static_df.col("ints").unwrap();
        if let BasicTypedArray::I32(values) = &int_series.values {
            assert_eq!(*values, vec![100, 200]);
        } else {
            panic!("Expected I32 conversion");
        }

        // Verify F32 conversion
        let float_series = static_df.col("floats").unwrap();
        if let BasicTypedArray::F32(values) = &float_series.values {
            assert!((values[0] - f32::consts::PI).abs() < f32::EPSILON);
            assert!((values[1] - f32::consts::E).abs() < f32::EPSILON);
        } else {
            panic!("Expected F32 conversion");
        }

        // Verify String conversion
        let str_series = static_df.col("texts").unwrap();
        if let BasicTypedArray::String(values) = &str_series.values {
            assert_eq!(values[0], "static");
            assert_eq!(values[1], "conversion");
        } else {
            panic!("Expected String conversion");
        }
    }

    #[test]
    fn static_dataframe_debug_display() {
        // Test that StaticDataFrame implements Debug
        let debug_str = format!("{TEST_STATIC_DF:?}");
        assert!(debug_str.contains("StaticDataFrame"));
        assert!(debug_str.contains("columns"));
        assert!(debug_str.contains("names"));

        // Test StaticTypedArray Debug
        let array = StaticTypedArray::I32([1, 2, 3]);
        let array_debug = format!("{array:?}");
        assert!(array_debug.contains("I32"));
    }
}