1use crate::value::{ColType, Value};
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeSet;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct Column {
15 pub name: String,
16 pub ty: ColType,
17 pub cells: Vec<Value>,
18}
19
20impl Column {
21 pub fn new(name: impl Into<String>, cells: Vec<Value>) -> Self {
24 let ty = cells
25 .iter()
26 .fold(ColType::Unknown, |acc, v| acc.unify(v.col_type()));
27 Column {
28 name: name.into(),
29 ty,
30 cells,
31 }
32 }
33
34 pub fn numeric(&self) -> Vec<f64> {
37 self.cells
38 .iter()
39 .filter_map(Value::as_f64)
40 .filter(|x| x.is_finite())
41 .collect()
42 }
43
44 pub fn null_count(&self) -> usize {
46 self.cells.iter().filter(|v| v.is_null()).count()
47 }
48
49 pub fn len(&self) -> usize {
50 self.cells.len()
51 }
52
53 pub fn is_empty(&self) -> bool {
54 self.cells.is_empty()
55 }
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct RecordSet {
62 pub source: String,
64 pub format: String,
66 pub columns: Vec<Column>,
67}
68
69impl RecordSet {
70 pub fn new(source: impl Into<String>, format: impl Into<String>, columns: Vec<Column>) -> Self {
74 debug_assert!(
75 columns.windows(2).all(|w| w[0].len() == w[1].len()),
76 "RecordSet columns must be equal length"
77 );
78 RecordSet {
79 source: source.into(),
80 format: format.into(),
81 columns,
82 }
83 }
84
85 pub fn rows(&self) -> usize {
87 self.columns.first().map_or(0, Column::len)
88 }
89
90 pub fn width(&self) -> usize {
91 self.columns.len()
92 }
93
94 pub fn column(&self, name: &str) -> Option<&Column> {
95 self.columns.iter().find(|c| c.name == name)
96 }
97
98 pub fn select(&self, names: &[String]) -> RecordSet {
107 let keep: BTreeSet<&str> = names.iter().map(String::as_str).collect();
108 self.retain(|name| keep.contains(name))
109 }
110
111 pub fn without(&self, names: &[String]) -> RecordSet {
115 let drop: BTreeSet<&str> = names.iter().map(String::as_str).collect();
116 self.retain(|name| !drop.contains(name))
117 }
118
119 fn retain(&self, keep: impl Fn(&str) -> bool) -> RecordSet {
121 RecordSet {
122 source: self.source.clone(),
123 format: self.format.clone(),
124 columns: self
125 .columns
126 .iter()
127 .filter(|c| keep(&c.name))
128 .cloned()
129 .collect(),
130 }
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn numeric_skips_nulls_and_strings() {
140 let col = Column::new(
141 "x",
142 vec![
143 Value::Int(1),
144 Value::Null,
145 Value::Str("nope".into()),
146 Value::Float(2.5),
147 ],
148 );
149 assert_eq!(col.numeric(), vec![1.0, 2.5]);
150 assert_eq!(col.ty, ColType::Mixed);
151 assert_eq!(col.null_count(), 1);
152 }
153
154 #[test]
155 fn null_count_is_exact() {
156 assert_eq!(
157 Column::new("a", vec![Value::Int(1), Value::Int(2)]).null_count(),
158 0
159 );
160 assert_eq!(
161 Column::new("b", vec![Value::Null, Value::Int(1), Value::Null]).null_count(),
162 2
163 );
164 }
165
166 #[test]
167 fn empty_and_nonempty_columns() {
168 assert!(Column::new("e", vec![]).is_empty());
169 assert!(!Column::new("f", vec![Value::Int(1)]).is_empty());
170 }
171
172 #[test]
173 fn rows_and_width() {
174 let rs = RecordSet::new(
175 "-",
176 "csv",
177 vec![
178 Column::new("a", vec![Value::Int(1), Value::Int(2)]),
179 Column::new("b", vec![Value::Int(3), Value::Int(4)]),
180 ],
181 );
182 assert_eq!(rs.rows(), 2);
183 assert_eq!(rs.width(), 2);
184 assert!(rs.column("a").is_some());
185 assert!(rs.column("z").is_none());
186 }
187
188 fn abc() -> RecordSet {
189 RecordSet::new(
190 "src.csv",
191 "csv",
192 vec![
193 Column::new("a", vec![Value::Int(1)]),
194 Column::new("b", vec![Value::Int(2)]),
195 Column::new("c", vec![Value::Int(3)]),
196 ],
197 )
198 }
199
200 #[test]
201 fn select_keeps_named_columns_in_original_order() {
202 let rs = abc().select(&["c".to_string(), "a".to_string()]);
205 let names: Vec<&str> = rs.columns.iter().map(|c| c.name.as_str()).collect();
206 assert_eq!(names, ["a", "c"]);
207 assert_eq!(rs.source, "src.csv");
208 assert_eq!(rs.format, "csv");
209 assert_eq!(rs.rows(), 1);
210 }
211
212 #[test]
213 fn select_skips_unknown_names() {
214 let rs = abc().select(&["a".to_string(), "nope".to_string()]);
216 let names: Vec<&str> = rs.columns.iter().map(|c| c.name.as_str()).collect();
217 assert_eq!(names, ["a"]);
218 }
219
220 #[test]
221 fn select_empty_yields_no_columns() {
222 assert_eq!(abc().select(&[]).width(), 0);
223 }
224
225 #[test]
226 fn without_drops_named_columns_and_keeps_the_rest() {
227 let rs = abc().without(&["b".to_string()]);
228 let names: Vec<&str> = rs.columns.iter().map(|c| c.name.as_str()).collect();
229 assert_eq!(names, ["a", "c"]);
230 assert_eq!(rs.source, "src.csv");
231 assert_eq!(rs.format, "csv");
232 }
233
234 #[test]
235 fn without_empty_keeps_everything() {
236 assert_eq!(abc().without(&[]).width(), 3);
237 }
238
239 #[test]
240 fn without_unknown_name_is_a_noop() {
241 assert_eq!(abc().without(&["zzz".to_string()]).width(), 3);
242 }
243}