nexcore_dataframe/
dataframe.rs1use crate::column::{Column, DataType};
4use crate::error::DataFrameError;
5use crate::scalar::Scalar;
6use crate::schema::Schema;
7
8#[derive(Debug, Clone)]
11pub struct DataFrame {
12 columns: Vec<Column>,
13}
14
15impl DataFrame {
16 pub fn new(columns: Vec<Column>) -> Result<Self, DataFrameError> {
19 if columns.is_empty() {
20 return Ok(Self { columns });
21 }
22 #[allow(
24 clippy::indexing_slicing,
25 reason = "columns is non-empty (checked by is_empty() guard above); index 0 and slice [1..] are always in bounds"
26 )]
27 let expected = columns[0].len();
28 #[allow(
29 clippy::indexing_slicing,
30 reason = "columns is non-empty; slice [1..] on a non-empty Vec is always valid (may produce empty slice)"
31 )]
32 for col in &columns[1..] {
33 if col.len() != expected {
34 return Err(DataFrameError::LengthMismatch {
35 expected,
36 actual: col.len(),
37 });
38 }
39 }
40 Ok(Self { columns })
41 }
42
43 #[must_use]
45 pub fn empty() -> Self {
46 Self {
47 columns: Vec::new(),
48 }
49 }
50
51 #[must_use]
53 pub fn height(&self) -> usize {
54 self.columns.first().map_or(0, |c| c.len())
55 }
56
57 #[must_use]
59 pub fn width(&self) -> usize {
60 self.columns.len()
61 }
62
63 #[must_use]
65 pub fn is_empty(&self) -> bool {
66 self.columns.is_empty()
67 }
68
69 pub fn column(&self, name: &str) -> Result<&Column, DataFrameError> {
71 self.columns
72 .iter()
73 .find(|c| c.name() == name)
74 .ok_or_else(|| DataFrameError::ColumnNotFound(name.to_string()))
75 }
76
77 pub fn column_names(&self) -> Vec<&str> {
79 self.columns.iter().map(|c| c.name()).collect()
80 }
81
82 #[must_use]
84 pub fn columns(&self) -> &[Column] {
85 &self.columns
86 }
87
88 #[must_use]
90 pub fn schema(&self) -> Schema {
91 Schema::new(
92 self.columns
93 .iter()
94 .map(|c| (c.name().to_string(), c.dtype()))
95 .collect(),
96 )
97 }
98
99 pub fn row(&self, index: usize) -> Option<Vec<Scalar>> {
101 if index >= self.height() {
102 return None;
103 }
104 Some(
105 self.columns
106 .iter()
107 .map(|c| c.get(index).unwrap_or(Scalar::Null))
108 .collect(),
109 )
110 }
111
112 pub fn with_column(&self, col: Column) -> Result<Self, DataFrameError> {
114 if !self.is_empty() && col.len() != self.height() {
115 return Err(DataFrameError::LengthMismatch {
116 expected: self.height(),
117 actual: col.len(),
118 });
119 }
120 let mut columns: Vec<Column> = self
121 .columns
122 .iter()
123 .filter(|c| c.name() != col.name())
124 .cloned()
125 .collect();
126 columns.push(col);
127 Ok(Self { columns })
128 }
129
130 pub(crate) fn from_columns_unchecked(columns: Vec<Column>) -> Self {
132 Self { columns }
133 }
134}
135
136impl std::fmt::Display for DataFrame {
137 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
138 let names: Vec<&str> = self.columns.iter().map(|c| c.name()).collect();
140 writeln!(f, "{}", names.join("\t"))?;
141 let max_rows = self.height().min(20);
143 for i in 0..max_rows {
144 let vals: Vec<String> = self
145 .columns
146 .iter()
147 .map(|c| c.get(i).map_or("null".to_string(), |s| s.to_string()))
148 .collect();
149 writeln!(f, "{}", vals.join("\t"))?;
150 }
151 if self.height() > max_rows {
152 #[allow(
154 clippy::arithmetic_side_effects,
155 reason = "max_rows = self.height().min(20) so max_rows <= self.height(); subtraction cannot underflow"
156 )]
157 writeln!(f, "... ({} more rows)", self.height() - max_rows)?;
158 }
159 Ok(())
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn new_valid() {
169 let df = DataFrame::new(vec![
170 Column::from_strs("name", &["a", "b"]),
171 Column::from_i64s("val", vec![1, 2]),
172 ]);
173 assert!(df.is_ok());
174 let df = df.unwrap_or_else(|_| unreachable!());
175 assert_eq!(df.height(), 2);
176 assert_eq!(df.width(), 2);
177 }
178
179 #[test]
180 fn new_length_mismatch() {
181 let df = DataFrame::new(vec![
182 Column::from_strs("a", &["x", "y"]),
183 Column::from_i64s("b", vec![1]),
184 ]);
185 assert!(df.is_err());
186 }
187
188 #[test]
189 fn empty_dataframe() {
190 let df = DataFrame::empty();
191 assert_eq!(df.height(), 0);
192 assert_eq!(df.width(), 0);
193 assert!(df.is_empty());
194 }
195
196 #[test]
197 fn column_lookup() {
198 let df = DataFrame::new(vec![
199 Column::from_strs("name", &["a"]),
200 Column::from_i64s("val", vec![1]),
201 ])
202 .unwrap_or_else(|_| unreachable!());
203 assert!(df.column("name").is_ok());
204 assert!(df.column("missing").is_err());
205 }
206
207 #[test]
208 fn schema_extraction() {
209 let df = DataFrame::new(vec![
210 Column::from_strs("s", &["x"]),
211 Column::from_f64s("f", vec![1.0]),
212 ])
213 .unwrap_or_else(|_| unreachable!());
214 let s = df.schema();
215 assert_eq!(s.len(), 2);
216 assert_eq!(s.dtype("s"), Some(DataType::Utf8));
217 assert_eq!(s.dtype("f"), Some(DataType::Float64));
218 }
219
220 #[test]
221 fn with_column_add() {
222 let df = DataFrame::new(vec![Column::from_i64s("a", vec![1, 2])])
223 .unwrap_or_else(|_| unreachable!());
224 let df2 = df
225 .with_column(Column::from_i64s("b", vec![3, 4]))
226 .unwrap_or_else(|_| unreachable!());
227 assert_eq!(df2.width(), 2);
228 }
229
230 #[test]
231 fn with_column_replace() {
232 let df = DataFrame::new(vec![Column::from_i64s("a", vec![1, 2])])
233 .unwrap_or_else(|_| unreachable!());
234 let df2 = df
235 .with_column(Column::from_i64s("a", vec![10, 20]))
236 .unwrap_or_else(|_| unreachable!());
237 assert_eq!(df2.width(), 1);
238 assert_eq!(
239 df2.column("a").unwrap_or_else(|_| unreachable!()).get(0),
240 Some(Scalar::Int64(10))
241 );
242 }
243
244 #[test]
245 fn with_column_length_mismatch() {
246 let df = DataFrame::new(vec![Column::from_i64s("a", vec![1, 2])])
247 .unwrap_or_else(|_| unreachable!());
248 assert!(df.with_column(Column::from_i64s("b", vec![1])).is_err());
249 }
250
251 #[test]
252 fn row_access() {
253 let df = DataFrame::new(vec![
254 Column::from_strs("name", &["alice"]),
255 Column::from_i64s("age", vec![30]),
256 ])
257 .unwrap_or_else(|_| unreachable!());
258 let r = df.row(0);
259 assert!(r.is_some());
260 let r = r.unwrap_or_else(|| unreachable!());
261 assert_eq!(r.len(), 2);
262 assert_eq!(r[0], Scalar::String("alice".to_string()));
263 assert_eq!(r[1], Scalar::Int64(30));
264 assert!(df.row(1).is_none());
265 }
266}