1use crate::error::{DataFrameError, DataFrameResult};
4use std::collections::HashMap;
5use xdl_core::XdlValue;
6
7#[derive(Debug, Clone)]
9pub struct Series {
10 data: Vec<XdlValue>,
11}
12
13impl Series {
14 pub fn from_vec(data: Vec<XdlValue>) -> DataFrameResult<Self> {
16 Ok(Self { data })
17 }
18
19 pub fn len(&self) -> usize {
21 self.data.len()
22 }
23
24 pub fn is_empty(&self) -> bool {
26 self.data.is_empty()
27 }
28
29 pub fn get(&self, index: usize) -> DataFrameResult<&XdlValue> {
31 self.data
32 .get(index)
33 .ok_or(DataFrameError::IndexOutOfBounds(index, self.data.len()))
34 }
35
36 pub fn dtype(&self) -> String {
38 if self.data.is_empty() {
39 return "undefined".to_string();
40 }
41
42 let first_type = format!("{:?}", self.data[0].gdl_type());
44 first_type
45 }
46
47 pub fn head(&self, n: usize) -> DataFrameResult<Self> {
49 let n = n.min(self.data.len());
50 Ok(Self {
51 data: self.data[..n].to_vec(),
52 })
53 }
54
55 pub fn tail(&self, n: usize) -> DataFrameResult<Self> {
57 let n = n.min(self.data.len());
58 let start = self.data.len() - n;
59 Ok(Self {
60 data: self.data[start..].to_vec(),
61 })
62 }
63
64 pub fn describe(&self) -> DataFrameResult<HashMap<String, f64>> {
66 let nums: Vec<f64> = self
67 .data
68 .iter()
69 .filter_map(|v| v.to_double().ok())
70 .collect();
71
72 if nums.is_empty() {
73 return Err(DataFrameError::InvalidOperation(
74 "Cannot describe non-numeric series".to_string(),
75 ));
76 }
77
78 let mut stats = HashMap::new();
79 stats.insert("count".to_string(), nums.len() as f64);
80
81 let sum: f64 = nums.iter().sum();
82 let mean = sum / nums.len() as f64;
83 stats.insert("mean".to_string(), mean);
84
85 let mut sorted = nums.clone();
86 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
87
88 stats.insert("min".to_string(), sorted[0]);
89 stats.insert("max".to_string(), sorted[sorted.len() - 1]);
90
91 let mid = sorted.len() / 2;
93 let median = if sorted.len().is_multiple_of(2) {
94 (sorted[mid - 1] + sorted[mid]) / 2.0
95 } else {
96 sorted[mid]
97 };
98 stats.insert("median".to_string(), median);
99
100 let variance = nums.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / nums.len() as f64;
102 stats.insert("std".to_string(), variance.sqrt());
103
104 Ok(stats)
105 }
106
107 pub fn sum(&self) -> DataFrameResult<f64> {
109 let sum: f64 = self.data.iter().filter_map(|v| v.to_double().ok()).sum();
110 Ok(sum)
111 }
112
113 pub fn mean(&self) -> DataFrameResult<f64> {
115 let nums: Vec<f64> = self
116 .data
117 .iter()
118 .filter_map(|v| v.to_double().ok())
119 .collect();
120
121 if nums.is_empty() {
122 return Err(DataFrameError::InvalidOperation(
123 "Cannot compute mean of empty or non-numeric series".to_string(),
124 ));
125 }
126
127 Ok(nums.iter().sum::<f64>() / nums.len() as f64)
128 }
129
130 pub fn unique(&self) -> Vec<XdlValue> {
132 let mut unique_values = Vec::new();
133 let mut seen = std::collections::HashSet::new();
134
135 for value in &self.data {
136 let key = value.to_string_repr();
137 if seen.insert(key) {
138 unique_values.push(value.clone());
139 }
140 }
141
142 unique_values
143 }
144
145 pub fn count(&self) -> usize {
147 self.data.len()
148 }
149
150 pub fn value_counts(&self) -> HashMap<String, usize> {
152 let mut counts = HashMap::new();
153
154 for value in &self.data {
155 let key = value.to_string_repr();
156 *counts.entry(key).or_insert(0) += 1;
157 }
158
159 counts
160 }
161
162 pub fn map<F>(&self, f: F) -> DataFrameResult<Self>
164 where
165 F: Fn(&XdlValue) -> XdlValue,
166 {
167 let mapped_data: Vec<XdlValue> = self.data.iter().map(f).collect();
168 Self::from_vec(mapped_data)
169 }
170
171 pub fn filter<F>(&self, predicate: F) -> DataFrameResult<Self>
173 where
174 F: Fn(&XdlValue) -> bool,
175 {
176 let filtered_data: Vec<XdlValue> =
177 self.data.iter().filter(|v| predicate(v)).cloned().collect();
178 Self::from_vec(filtered_data)
179 }
180
181 pub fn data(&self) -> &[XdlValue] {
183 &self.data
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_series_creation() {
193 let data = vec![XdlValue::Long(1), XdlValue::Long(2), XdlValue::Long(3)];
194 let series = Series::from_vec(data).unwrap();
195 assert_eq!(series.len(), 3);
196 }
197
198 #[test]
199 fn test_series_get() {
200 let data = vec![XdlValue::Long(1), XdlValue::Long(2)];
201 let series = Series::from_vec(data).unwrap();
202 assert!(matches!(series.get(0), Ok(XdlValue::Long(1))));
203 }
204
205 #[test]
206 fn test_series_sum() {
207 let data = vec![
208 XdlValue::Double(1.0),
209 XdlValue::Double(2.0),
210 XdlValue::Double(3.0),
211 ];
212 let series = Series::from_vec(data).unwrap();
213 assert_eq!(series.sum().unwrap(), 6.0);
214 }
215
216 #[test]
217 fn test_series_mean() {
218 let data = vec![
219 XdlValue::Double(1.0),
220 XdlValue::Double(2.0),
221 XdlValue::Double(3.0),
222 ];
223 let series = Series::from_vec(data).unwrap();
224 assert_eq!(series.mean().unwrap(), 2.0);
225 }
226}