virtual_frame/
dataframe.rs1use crate::column::Column;
7use std::fmt;
8
9#[derive(Debug, Clone)]
11pub enum DataError {
12 ColumnLengthMismatch {
14 expected: usize,
15 got: usize,
16 column: String,
17 },
18 DuplicateColumn(String),
20 Empty,
22}
23
24impl fmt::Display for DataError {
25 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26 match self {
27 DataError::ColumnLengthMismatch {
28 expected,
29 got,
30 column,
31 } => write!(
32 f,
33 "column `{}` has {} rows, expected {}",
34 column, got, expected
35 ),
36 DataError::DuplicateColumn(name) => write!(f, "duplicate column `{}`", name),
37 DataError::Empty => write!(f, "no columns provided"),
38 }
39 }
40}
41
42impl std::error::Error for DataError {}
43
44#[derive(Debug, Clone)]
49pub struct DataFrame {
50 pub columns: Vec<(String, Column)>,
51}
52
53impl DataFrame {
54 pub fn new() -> Self {
56 Self {
57 columns: Vec::new(),
58 }
59 }
60
61 pub fn from_columns(columns: Vec<(String, Column)>) -> Result<Self, DataError> {
65 if columns.is_empty() {
66 return Ok(Self {
67 columns: Vec::new(),
68 });
69 }
70 let mut names = std::collections::BTreeSet::new();
72 for (name, _) in &columns {
73 if !names.insert(name.as_str()) {
74 return Err(DataError::DuplicateColumn(name.clone()));
75 }
76 }
77 let len = columns[0].1.len();
79 for (name, col) in &columns {
80 if col.len() != len {
81 return Err(DataError::ColumnLengthMismatch {
82 expected: len,
83 got: col.len(),
84 column: name.clone(),
85 });
86 }
87 }
88 Ok(Self { columns })
89 }
90
91 pub fn nrows(&self) -> usize {
93 self.columns.first().map(|(_, c)| c.len()).unwrap_or(0)
94 }
95
96 pub fn ncols(&self) -> usize {
98 self.columns.len()
99 }
100
101 pub fn get_column(&self, name: &str) -> Option<&Column> {
103 self.columns
104 .iter()
105 .find(|(n, _)| n == name)
106 .map(|(_, c)| c)
107 }
108
109 pub fn column_index(&self, name: &str) -> Option<usize> {
111 self.columns.iter().position(|(n, _)| n == name)
112 }
113
114 pub fn column_names(&self) -> Vec<&str> {
116 self.columns.iter().map(|(n, _)| n.as_str()).collect()
117 }
118}
119
120impl Default for DataFrame {
121 fn default() -> Self {
122 Self::new()
123 }
124}
125
126#[cfg(test)]
127mod tests {
128 use super::*;
129
130 #[test]
131 fn test_from_columns() {
132 let df = DataFrame::from_columns(vec![
133 ("id".into(), Column::Int(vec![1, 2, 3])),
134 ("name".into(), Column::Str(vec!["a".into(), "b".into(), "c".into()])),
135 ])
136 .unwrap();
137 assert_eq!(df.nrows(), 3);
138 assert_eq!(df.ncols(), 2);
139 }
140
141 #[test]
142 fn test_length_mismatch() {
143 let result = DataFrame::from_columns(vec![
144 ("a".into(), Column::Int(vec![1, 2])),
145 ("b".into(), Column::Int(vec![1, 2, 3])),
146 ]);
147 assert!(result.is_err());
148 }
149
150 #[test]
151 fn test_duplicate_column() {
152 let result = DataFrame::from_columns(vec![
153 ("a".into(), Column::Int(vec![1])),
154 ("a".into(), Column::Int(vec![2])),
155 ]);
156 assert!(result.is_err());
157 }
158
159 #[test]
160 fn test_get_column() {
161 let df = DataFrame::from_columns(vec![
162 ("x".into(), Column::Float(vec![1.0, 2.0])),
163 ])
164 .unwrap();
165 assert!(df.get_column("x").is_some());
166 assert!(df.get_column("y").is_none());
167 }
168}