xdl_dataframe/
series.rs

1//! Series - a single column of data
2
3use crate::error::{DataFrameError, DataFrameResult};
4use std::collections::HashMap;
5use xdl_core::XdlValue;
6
7/// Series - represents a single column of data
8#[derive(Debug, Clone)]
9pub struct Series {
10    data: Vec<XdlValue>,
11}
12
13impl Series {
14    /// Create a new Series from a vector
15    pub fn from_vec(data: Vec<XdlValue>) -> DataFrameResult<Self> {
16        Ok(Self { data })
17    }
18
19    /// Get length
20    pub fn len(&self) -> usize {
21        self.data.len()
22    }
23
24    /// Check if empty
25    pub fn is_empty(&self) -> bool {
26        self.data.is_empty()
27    }
28
29    /// Get value at index
30    pub fn get(&self, index: usize) -> DataFrameResult<&XdlValue> {
31        self.data
32            .get(index)
33            .ok_or(DataFrameError::IndexOutOfBounds(index, self.data.len()))
34    }
35
36    /// Get data type as string
37    pub fn dtype(&self) -> String {
38        if self.data.is_empty() {
39            return "undefined".to_string();
40        }
41
42        // Determine predominant type
43        let first_type = format!("{:?}", self.data[0].gdl_type());
44        first_type
45    }
46
47    /// Head - get first n elements
48    pub fn head(&self, n: usize) -> DataFrameResult<Self> {
49        let n = n.min(self.data.len());
50        Ok(Self {
51            data: self.data[..n].to_vec(),
52        })
53    }
54
55    /// Tail - get last n elements
56    pub fn tail(&self, n: usize) -> DataFrameResult<Self> {
57        let n = n.min(self.data.len());
58        let start = self.data.len() - n;
59        Ok(Self {
60            data: self.data[start..].to_vec(),
61        })
62    }
63
64    /// Describe - get statistical summary for numeric series
65    pub fn describe(&self) -> DataFrameResult<HashMap<String, f64>> {
66        let nums: Vec<f64> = self
67            .data
68            .iter()
69            .filter_map(|v| v.to_double().ok())
70            .collect();
71
72        if nums.is_empty() {
73            return Err(DataFrameError::InvalidOperation(
74                "Cannot describe non-numeric series".to_string(),
75            ));
76        }
77
78        let mut stats = HashMap::new();
79        stats.insert("count".to_string(), nums.len() as f64);
80
81        let sum: f64 = nums.iter().sum();
82        let mean = sum / nums.len() as f64;
83        stats.insert("mean".to_string(), mean);
84
85        let mut sorted = nums.clone();
86        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
87
88        stats.insert("min".to_string(), sorted[0]);
89        stats.insert("max".to_string(), sorted[sorted.len() - 1]);
90
91        // Median
92        let mid = sorted.len() / 2;
93        let median = if sorted.len().is_multiple_of(2) {
94            (sorted[mid - 1] + sorted[mid]) / 2.0
95        } else {
96            sorted[mid]
97        };
98        stats.insert("median".to_string(), median);
99
100        // Standard deviation
101        let variance = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / nums.len() as f64;
102        stats.insert("std".to_string(), variance.sqrt());
103
104        Ok(stats)
105    }
106
107    /// Sum of numeric values
108    pub fn sum(&self) -> DataFrameResult<f64> {
109        let sum: f64 = self.data.iter().filter_map(|v| v.to_double().ok()).sum();
110        Ok(sum)
111    }
112
113    /// Mean of numeric values
114    pub fn mean(&self) -> DataFrameResult<f64> {
115        let nums: Vec<f64> = self
116            .data
117            .iter()
118            .filter_map(|v| v.to_double().ok())
119            .collect();
120
121        if nums.is_empty() {
122            return Err(DataFrameError::InvalidOperation(
123                "Cannot compute mean of empty or non-numeric series".to_string(),
124            ));
125        }
126
127        Ok(nums.iter().sum::<f64>() / nums.len() as f64)
128    }
129
130    /// Get unique values
131    pub fn unique(&self) -> Vec<XdlValue> {
132        let mut unique_values = Vec::new();
133        let mut seen = std::collections::HashSet::new();
134
135        for value in &self.data {
136            let key = value.to_string_repr();
137            if seen.insert(key) {
138                unique_values.push(value.clone());
139            }
140        }
141
142        unique_values
143    }
144
145    /// Count of values
146    pub fn count(&self) -> usize {
147        self.data.len()
148    }
149
150    /// Value counts - return counts of unique values
151    pub fn value_counts(&self) -> HashMap<String, usize> {
152        let mut counts = HashMap::new();
153
154        for value in &self.data {
155            let key = value.to_string_repr();
156            *counts.entry(key).or_insert(0) += 1;
157        }
158
159        counts
160    }
161
162    /// Apply a function to each element
163    pub fn map<F>(&self, f: F) -> DataFrameResult<Self>
164    where
165        F: Fn(&XdlValue) -> XdlValue,
166    {
167        let mapped_data: Vec<XdlValue> = self.data.iter().map(f).collect();
168        Self::from_vec(mapped_data)
169    }
170
171    /// Filter elements based on predicate
172    pub fn filter<F>(&self, predicate: F) -> DataFrameResult<Self>
173    where
174        F: Fn(&XdlValue) -> bool,
175    {
176        let filtered_data: Vec<XdlValue> =
177            self.data.iter().filter(|v| predicate(v)).cloned().collect();
178        Self::from_vec(filtered_data)
179    }
180
181    /// Get the underlying data vector
182    pub fn data(&self) -> &[XdlValue] {
183        &self.data
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_series_creation() {
193        let data = vec![XdlValue::Long(1), XdlValue::Long(2), XdlValue::Long(3)];
194        let series = Series::from_vec(data).unwrap();
195        assert_eq!(series.len(), 3);
196    }
197
198    #[test]
199    fn test_series_get() {
200        let data = vec![XdlValue::Long(1), XdlValue::Long(2)];
201        let series = Series::from_vec(data).unwrap();
202        assert!(matches!(series.get(0), Ok(XdlValue::Long(1))));
203    }
204
205    #[test]
206    fn test_series_sum() {
207        let data = vec![
208            XdlValue::Double(1.0),
209            XdlValue::Double(2.0),
210            XdlValue::Double(3.0),
211        ];
212        let series = Series::from_vec(data).unwrap();
213        assert_eq!(series.sum().unwrap(), 6.0);
214    }
215
216    #[test]
217    fn test_series_mean() {
218        let data = vec![
219            XdlValue::Double(1.0),
220            XdlValue::Double(2.0),
221            XdlValue::Double(3.0),
222        ];
223        let series = Series::from_vec(data).unwrap();
224        assert_eq!(series.mean().unwrap(), 2.0);
225    }
226}