gguppy_data 0.4.0

Traits and adapters used by gguppy for dataframe access
Documentation
//! Wrappers for inbuilt data structures from the Rust standard library
//!
//! This entire module is behind a `std` feature flag.
//!
//! ## Design Philosophy
//!
//! - Functions to directly convert a `HashMap` or `BTreeMap` to a `DataFrame`.
//!   These take ownership rather than borrows, assuming you want to continue the rest of your
//!   work in `gguppy` with the `DataFrame` rather than the original `HashMap` or `BTreeMap`.
//! - Note that users should then be able to convert from `DataFrame` to any other supported
//!   `ggconsumable` data structure of their choice
//!
//! ## Future Work
//!
//! - [ ] Conversion from struct
//! - [ ] Conversion from `std::collections::HashMap`
//! - [ ] Conversion from `std::collections::BTreeMap`

extern crate std;
use alloc::{string::ToString, sync::Arc, vec, vec::Vec};
use std::collections::{BTreeMap, HashMap};

use crate::basicdf::{BasicDataFrame, BasicNamedArray, BasicSeries, FromNamedArray};

#[must_use]
pub fn hashmap_to_dataframe<K, V: 'static, S>(map: HashMap<K, V, S>) -> BasicDataFrame
where
    BasicNamedArray: FromNamedArray<V>,
    K: 'static + ToString,
{
    let mut columns: Vec<BasicSeries> = Vec::new();
    for (key, value) in map {
        let series_data = BasicNamedArray::new(&key.to_string(), vec![value]);
        columns.push(BasicSeries(Arc::new(series_data)));
    }
    // TODO: Check if all columns are of equal length
    BasicDataFrame::new(columns)
}

#[must_use]
pub fn btreemap_to_dataframe<K, V: 'static>(map: BTreeMap<K, V>) -> BasicDataFrame
where
    BasicNamedArray: FromNamedArray<V>,
    K: 'static + ToString,
{
    let mut columns: Vec<BasicSeries> = Vec::new();
    for (key, value) in map {
        let series_data = BasicNamedArray::new(&key.to_string(), vec![value]);
        columns.push(BasicSeries(Arc::new(series_data)));
    }
    // TODO: Check if all columns are of equal length
    BasicDataFrame::new(columns)
}

#[cfg(test)]
mod tests {
    use alloc::sync::Arc;
    use core::f32;

    use gguppy_core::data::{DataFrameAdapter, SeriesAdapter};

    use super::*;

    #[test]
    fn test_hashmap_to_dataframe() {
        let mut map = HashMap::new();
        map.insert("key1", 1);
        map.insert("key2", 2);
        map.insert("key3", 3);

        let expected_columns = vec![
            BasicSeries(Arc::new(BasicNamedArray::new("key1", vec![1]))),
            BasicSeries(Arc::new(BasicNamedArray::new("key2", vec![2]))),
            BasicSeries(Arc::new(BasicNamedArray::new("key3", vec![3]))),
        ];
        let expected_dataframe = BasicDataFrame::new(expected_columns);

        let result = hashmap_to_dataframe(map);

        // HashMap has no guaranteed order, so check length and one column if possible
        assert_eq!(result.columns.len(), expected_dataframe.columns.len());
        assert_eq!(result.shape(), (1, 3));

        // Check that all expected column names exist
        let column_names = result.column_names();
        assert!(column_names.contains(&"key1".to_string()));
        assert!(column_names.contains(&"key2".to_string()));
        assert!(column_names.contains(&"key3".to_string()));

        // Check that we can retrieve each column
        let col1 = result.col("key1").unwrap();
        assert_eq!(col1.name(), "key1");
        assert_eq!(col1.len(), 1);
    }

    #[test]
    fn test_hashmap_to_dataframe_different_types() {
        // Test with string values
        let mut str_map = HashMap::new();
        str_map.insert("name", "Alice");
        str_map.insert("city", "Boston");

        let str_df = hashmap_to_dataframe(str_map);
        assert_eq!(str_df.shape(), (1, 2));

        // Test with float values
        let mut float_map = HashMap::new();
        float_map.insert("pi", f32::consts::PI);
        float_map.insert("e", f32::consts::E);

        let float_df = hashmap_to_dataframe(float_map);
        assert_eq!(float_df.shape(), (1, 2));

        // Test with boolean values
        let mut bool_map = HashMap::new();
        bool_map.insert("is_valid", true);
        bool_map.insert("is_complete", false);

        let bool_df = hashmap_to_dataframe(bool_map);
        assert_eq!(bool_df.shape(), (1, 2));
    }

    #[test]
    fn test_hashmap_empty() {
        let empty_map: HashMap<&str, i32> = HashMap::new();
        let result = hashmap_to_dataframe(empty_map);
        assert_eq!(result.shape(), (0, 0));
        assert!(result.columns.is_empty());
    }

    #[test]
    fn test_btreemap_to_dataframe() {
        let mut map = BTreeMap::new();
        map.insert("key1", 1);
        map.insert("key2", 2);
        map.insert("key3", 3);

        let expected_columns = vec![
            BasicSeries(Arc::new(BasicNamedArray::new("key1", vec![1]))),
            BasicSeries(Arc::new(BasicNamedArray::new("key2", vec![2]))),
            BasicSeries(Arc::new(BasicNamedArray::new("key3", vec![3]))),
        ];
        let expected_dataframe = BasicDataFrame::new(expected_columns);

        let result = btreemap_to_dataframe(map);

        assert_eq!(result, expected_dataframe);
        assert_eq!(result.shape(), (1, 3));

        // BTreeMap has guaranteed order, so we can test exact column order
        let column_names = result.column_names();
        assert_eq!(column_names, vec!["key1", "key2", "key3"]);
    }

    #[test]
    fn test_btreemap_to_dataframe_different_types() {
        // Test with different data types
        let mut char_map = BTreeMap::new();
        char_map.insert("first", 'a');
        char_map.insert("second", 'b');

        let char_df = btreemap_to_dataframe(char_map);
        assert_eq!(char_df.shape(), (1, 2));
        assert_eq!(char_df.column_names(), vec!["first", "second"]);

        // Test with numeric keys (converted to string)
        let mut num_key_map = BTreeMap::new();
        num_key_map.insert(1, "one");
        num_key_map.insert(2, "two");

        let num_key_df = btreemap_to_dataframe(num_key_map);
        assert_eq!(num_key_df.shape(), (1, 2));
        assert_eq!(num_key_df.column_names(), vec!["1", "2"]);
    }

    #[test]
    fn test_btreemap_empty() {
        let empty_map: BTreeMap<&str, i32> = BTreeMap::new();
        let result = btreemap_to_dataframe(empty_map);
        assert_eq!(result.shape(), (0, 0));
        assert!(result.columns.is_empty());
    }

    #[test]
    fn test_btreemap_single_entry() {
        let mut single_map = BTreeMap::new();
        single_map.insert("only", 42);

        let result = btreemap_to_dataframe(single_map);
        assert_eq!(result.shape(), (1, 1));
        assert_eq!(result.column_names(), vec!["only"]);

        let col = result.col("only").unwrap();
        assert_eq!(col.name(), "only");
        assert_eq!(col.len(), 1);
    }

    #[test]
    fn test_map_to_dataframe_operations() {
        // Test that resulting DataFrames support standard operations
        let mut map = BTreeMap::new();
        map.insert("alpha", 100);
        map.insert("beta", 200);
        map.insert("gamma", 300);

        let df = btreemap_to_dataframe(map);

        // Test column selection
        let selected = df.select(vec!["alpha", "gamma"]).unwrap();
        assert_eq!(selected.shape(), (1, 2));
        assert_eq!(selected.column_names(), vec!["alpha", "gamma"]);

        // Test individual column access
        let alpha_col = selected.col("alpha").unwrap();
        assert_eq!(alpha_col.name(), "alpha");

        // Test error handling
        let error_result = selected.col("nonexistent");
        assert!(error_result.is_err());
    }

    #[test]
    fn test_key_string_conversion() {
        // Test that different key types convert to strings properly
        let mut string_key_map = BTreeMap::new();
        string_key_map.insert("string_key", 1);

        let mut int_key_map = BTreeMap::new();
        int_key_map.insert(123, "value");

        let string_df = btreemap_to_dataframe(string_key_map);
        let int_df = btreemap_to_dataframe(int_key_map);

        assert_eq!(string_df.column_names(), vec!["string_key"]);
        assert_eq!(int_df.column_names(), vec!["123"]);
    }

    #[test]
    fn test_large_map_conversion() {
        // Test with larger maps to ensure performance is reasonable
        let mut large_map = BTreeMap::new();
        for i in 0..100 {
            large_map.insert(format!("col_{i}"), i);
        }

        let large_df = btreemap_to_dataframe(large_map);
        assert_eq!(large_df.shape(), (1, 100));

        // Test that we can access columns by name
        let col_50 = large_df.col("col_50").unwrap();
        assert_eq!(col_50.name(), "col_50");
        assert_eq!(col_50.len(), 1);
    }
}