1use super::types::{AggOp, CsvFile, CsvRecord, CsvSchema, CsvValidationReport, TrajectoryFrame};
6
7pub fn write_timeseries(
9 times: &[f64],
10 values: &[f64],
11 header_time: &str,
12 header_val: &str,
13) -> String {
14 let mut csv = CsvFile::new(vec![header_time.to_string(), header_val.to_string()]);
15 for (&t, &v) in times.iter().zip(values.iter()) {
16 csv.add_record_f64(&[t, v]);
17 }
18 csv.to_string()
19}
20pub fn parse_column_f64(csv: &CsvFile, name: &str) -> Result<Vec<f64>, String> {
22 let idx = csv
23 .get_column_by_name(name)
24 .ok_or_else(|| format!("Column '{}' not found", name))?;
25 csv.get_column_f64(idx)
26}
27#[allow(dead_code)]
32pub fn detect_delimiter(s: &str) -> char {
33 let candidates = [',', '\t', ';', '|'];
34 let lines: Vec<&str> = s.lines().take(10).collect();
35 if lines.is_empty() {
36 return ',';
37 }
38 let mut best_delim = ',';
39 let mut best_score: usize = 0;
40 for &delim in &candidates {
41 let counts: Vec<usize> = lines.iter().map(|l| l.split(delim).count()).collect();
42 if counts.is_empty() {
43 continue;
44 }
45 let first = counts[0];
46 if first < 2 {
47 continue;
48 }
49 let consistent = counts.iter().filter(|&&c| c == first).count();
50 let score = consistent * first;
51 if score > best_score {
52 best_score = score;
53 best_delim = delim;
54 }
55 }
56 best_delim
57}
58#[allow(dead_code)]
60pub fn parse_auto(s: &str) -> Result<CsvFile, String> {
61 let delim = detect_delimiter(s);
62 CsvFile::from_str_with_delimiter(s, delim)
63}
64#[allow(dead_code)]
67pub fn read_chunked(s: &str, chunk_size: usize) -> Vec<CsvFile> {
68 let full = match CsvFile::from_str(s) {
69 Ok(f) => f,
70 Err(_) => return Vec::new(),
71 };
72 if chunk_size == 0 {
73 return vec![full];
74 }
75 let mut chunks = Vec::new();
76 let mut start = 0;
77 while start < full.records.len() {
78 let end = (start + chunk_size).min(full.records.len());
79 let mut chunk = CsvFile::new(full.headers.clone());
80 for i in start..end {
81 chunk.records.push(CsvRecord {
82 fields: full.records[i].fields.clone(),
83 });
84 }
85 chunks.push(chunk);
86 start = end;
87 }
88 chunks
89}
90#[allow(dead_code)]
92pub fn normalize_header(s: &str) -> String {
93 s.trim()
94 .to_lowercase()
95 .chars()
96 .map(|c| {
97 if c.is_alphanumeric() || c == '_' {
98 c
99 } else {
100 '_'
101 }
102 })
103 .collect()
104}
105#[allow(dead_code)]
110pub fn aggregate_column(csv: &CsvFile, col: usize, op: AggOp) -> Option<f64> {
111 let vals = csv.get_column_f64(col).ok()?;
112 if vals.is_empty() {
113 return None;
114 }
115 let n = vals.len() as f64;
116 Some(match op {
117 AggOp::Sum => vals.iter().sum(),
118 AggOp::Mean => vals.iter().sum::<f64>() / n,
119 AggOp::Min => vals.iter().cloned().fold(f64::INFINITY, f64::min),
120 AggOp::Max => vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max),
121 AggOp::Std => {
122 let mean = vals.iter().sum::<f64>() / n;
123 let var = vals.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n;
124 var.sqrt()
125 }
126 AggOp::Count => vals.len() as f64,
127 })
128}
129#[allow(dead_code)]
131pub fn validate_csv(csv: &CsvFile, schema: &CsvSchema) -> CsvValidationReport {
132 CsvValidationReport {
133 errors: schema.validate(csv),
134 }
135}
136#[cfg(test)]
137mod tests {
138 use super::*;
139 use crate::csv_io::types::*;
140 #[test]
141 fn test_new_empty() {
142 let csv = CsvFile::new(vec!["x".into(), "y".into()]);
143 assert_eq!(csv.column_count(), 2);
144 assert_eq!(csv.record_count(), 0);
145 }
146 #[test]
147 fn test_add_record_string() {
148 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
149 csv.add_record(vec!["1".into(), "2".into()]);
150 assert_eq!(csv.record_count(), 1);
151 assert_eq!(csv.records[0].fields[0], "1");
152 }
153 #[test]
154 fn test_add_record_f64() {
155 let mut csv = CsvFile::new(vec!["t".into(), "v".into()]);
156 csv.add_record_f64(&[0.0, 9.81]);
157 assert_eq!(csv.record_count(), 1);
158 assert_eq!(csv.records[0].fields[1], "9.81");
159 }
160 #[test]
161 fn test_get_column_f64_valid() {
162 let mut csv = CsvFile::new(vec!["x".into()]);
163 csv.add_record_f64(&[1.5]);
164 csv.add_record_f64(&[3.0]);
165 let col = csv.get_column_f64(0).unwrap();
166 assert_eq!(col, vec![1.5, 3.0]);
167 }
168 #[test]
169 fn test_get_column_f64_out_of_range() {
170 let csv = CsvFile::new(vec!["x".into()]);
171 assert!(csv.get_column_f64(5).is_err());
172 }
173 #[test]
174 fn test_get_column_f64_parse_error() {
175 let mut csv = CsvFile::new(vec!["x".into()]);
176 csv.add_record(vec!["not_a_number".into()]);
177 assert!(csv.get_column_f64(0).is_err());
178 }
179 #[test]
180 fn test_get_column_by_name_found() {
181 let csv = CsvFile::new(vec!["time".into(), "energy".into()]);
182 assert_eq!(csv.get_column_by_name("energy"), Some(1));
183 }
184 #[test]
185 fn test_get_column_by_name_missing() {
186 let csv = CsvFile::new(vec!["time".into()]);
187 assert!(csv.get_column_by_name("missing").is_none());
188 }
189 #[test]
190 fn test_to_string_roundtrip() {
191 let mut csv = CsvFile::new(vec!["t".into(), "x".into()]);
192 csv.add_record_f64(&[0.0, 1.0]);
193 csv.add_record_f64(&[1.0, 2.0]);
194 let s = csv.to_string();
195 let parsed = CsvFile::from_str(&s).unwrap();
196 assert_eq!(parsed.headers, vec!["t", "x"]);
197 assert_eq!(parsed.record_count(), 2);
198 }
199 #[test]
200 fn test_from_str_with_spaces() {
201 let s = "time , value\n0.0 , 1.0\n1.0 , 2.0\n";
202 let csv = CsvFile::from_str(s).unwrap();
203 assert_eq!(csv.headers[0], "time");
204 assert_eq!(csv.record_count(), 2);
205 }
206 #[test]
207 fn test_from_str_empty_lines_ignored() {
208 let s = "a,b\n1,2\n\n3,4\n";
209 let csv = CsvFile::from_str(s).unwrap();
210 assert_eq!(csv.record_count(), 2);
211 }
212 #[test]
213 fn test_from_str_empty_input() {
214 assert!(CsvFile::from_str("").is_err());
215 }
216 #[test]
217 fn test_filter_rows_positive() {
218 let mut csv = CsvFile::new(vec!["v".into()]);
219 csv.add_record_f64(&[-1.0]);
220 csv.add_record_f64(&[2.0]);
221 csv.add_record_f64(&[3.0]);
222 let filtered = csv.filter_rows(0, |v| v > 0.0);
223 assert_eq!(filtered.record_count(), 2);
224 }
225 #[test]
226 fn test_filter_rows_none_match() {
227 let mut csv = CsvFile::new(vec!["v".into()]);
228 csv.add_record_f64(&[1.0]);
229 let filtered = csv.filter_rows(0, |v| v > 100.0);
230 assert_eq!(filtered.record_count(), 0);
231 }
232 #[test]
233 fn test_filter_rows_preserves_headers() {
234 let mut csv = CsvFile::new(vec!["t".into(), "x".into()]);
235 csv.add_record_f64(&[0.0, 1.0]);
236 let filtered = csv.filter_rows(0, |_| true);
237 assert_eq!(filtered.headers, vec!["t", "x"]);
238 }
239 #[test]
240 fn test_write_timeseries_format() {
241 let s = write_timeseries(&[0.0, 1.0], &[10.0, 20.0], "time", "pos");
242 assert!(s.starts_with("time,pos\n"));
243 assert!(s.contains("0,10") || s.contains("0.0") || s.contains("10"));
244 }
245 #[test]
246 fn test_parse_column_f64_by_name() {
247 let mut csv = CsvFile::new(vec!["t".into(), "e".into()]);
248 csv.add_record_f64(&[0.0, 5.0]);
249 csv.add_record_f64(&[1.0, 6.0]);
250 let col = parse_column_f64(&csv, "e").unwrap();
251 assert_eq!(col, vec![5.0, 6.0]);
252 }
253 #[test]
254 fn test_parse_column_f64_missing_name() {
255 let csv = CsvFile::new(vec!["t".into()]);
256 assert!(parse_column_f64(&csv, "nope").is_err());
257 }
258 #[test]
259 fn test_multiple_columns_round_trip() {
260 let mut csv = CsvFile::new(vec!["x".into(), "y".into(), "z".into()]);
261 csv.add_record_f64(&[1.0, 2.0, 3.0]);
262 let s = csv.to_string();
263 let parsed = CsvFile::from_str(&s).unwrap();
264 let x = parsed.get_column_f64(0).unwrap();
265 let y = parsed.get_column_f64(1).unwrap();
266 let z = parsed.get_column_f64(2).unwrap();
267 assert!((x[0] - 1.0).abs() < 1e-12);
268 assert!((y[0] - 2.0).abs() < 1e-12);
269 assert!((z[0] - 3.0).abs() < 1e-12);
270 }
271 #[test]
272 fn test_infer_column_type_integer() {
273 let mut csv = CsvFile::new(vec!["a".into()]);
274 csv.add_record(vec!["1".into()]);
275 csv.add_record(vec!["2".into()]);
276 csv.add_record(vec!["-10".into()]);
277 assert_eq!(csv.infer_column_type(0), ColumnType::Integer);
278 }
279 #[test]
280 fn test_infer_column_type_float() {
281 let mut csv = CsvFile::new(vec!["a".into()]);
282 csv.add_record(vec!["1.5".into()]);
283 csv.add_record(vec!["2.7".into()]);
284 assert_eq!(csv.infer_column_type(0), ColumnType::Float);
285 }
286 #[test]
287 fn test_infer_column_type_text() {
288 let mut csv = CsvFile::new(vec!["a".into()]);
289 csv.add_record(vec!["hello".into()]);
290 csv.add_record(vec!["world".into()]);
291 assert_eq!(csv.infer_column_type(0), ColumnType::Text);
292 }
293 #[test]
294 fn test_infer_column_type_mixed_int_float() {
295 let mut csv = CsvFile::new(vec!["a".into()]);
296 csv.add_record(vec!["1".into()]);
297 csv.add_record(vec!["2.5".into()]);
298 assert_eq!(csv.infer_column_type(0), ColumnType::Float);
299 }
300 #[test]
301 fn test_infer_column_type_empty() {
302 let csv = CsvFile::new(vec!["a".into()]);
303 assert_eq!(csv.infer_column_type(0), ColumnType::Text);
304 }
305 #[test]
306 fn test_infer_column_type_out_of_range() {
307 let csv = CsvFile::new(vec!["a".into()]);
308 assert_eq!(csv.infer_column_type(99), ColumnType::Text);
309 }
310 #[test]
311 fn test_select_columns_by_index() {
312 let mut csv = CsvFile::new(vec!["a".into(), "b".into(), "c".into()]);
313 csv.add_record(vec!["1".into(), "2".into(), "3".into()]);
314 let subset = csv.select_columns(&[0, 2]);
315 assert_eq!(subset.headers, vec!["a", "c"]);
316 assert_eq!(subset.records[0].fields, vec!["1", "3"]);
317 }
318 #[test]
319 fn test_select_columns_by_name() {
320 let mut csv = CsvFile::new(vec!["time".into(), "x".into(), "y".into()]);
321 csv.add_record(vec!["0".into(), "1.0".into(), "2.0".into()]);
322 let subset = csv.select_columns_by_name(&["y", "time"]);
323 assert_eq!(subset.headers, vec!["y", "time"]);
324 assert_eq!(subset.records[0].fields, vec!["2.0", "0"]);
325 }
326 #[test]
327 fn test_select_columns_missing_name_ignored() {
328 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
329 csv.add_record(vec!["1".into(), "2".into()]);
330 let subset = csv.select_columns_by_name(&["a", "missing"]);
331 assert_eq!(subset.headers, vec!["a"]);
332 }
333 #[test]
334 fn test_normalize_headers() {
335 let mut csv = CsvFile::new(vec![
336 " Time Step ".into(),
337 "X Position".into(),
338 "energy (J)".into(),
339 ]);
340 csv.normalize_headers();
341 assert_eq!(csv.headers[0], "time_step");
342 assert_eq!(csv.headers[1], "x_position");
343 assert_eq!(csv.headers[2], "energy__j_");
344 }
345 #[test]
346 fn test_column_stats_basic() {
347 let mut csv = CsvFile::new(vec!["v".into()]);
348 csv.add_record_f64(&[1.0]);
349 csv.add_record_f64(&[3.0]);
350 csv.add_record_f64(&[5.0]);
351 let stats = csv.column_stats(0).unwrap();
352 assert!((stats.min - 1.0).abs() < 1e-12);
353 assert!((stats.max - 5.0).abs() < 1e-12);
354 assert!((stats.mean - 3.0).abs() < 1e-12);
355 assert_eq!(stats.count, 3);
356 assert!((stats.sum - 9.0).abs() < 1e-12);
357 }
358 #[test]
359 fn test_column_stats_single_value() {
360 let mut csv = CsvFile::new(vec!["v".into()]);
361 csv.add_record_f64(&[42.0]);
362 let stats = csv.column_stats(0).unwrap();
363 assert!((stats.min - 42.0).abs() < 1e-12);
364 assert!((stats.max - 42.0).abs() < 1e-12);
365 assert!((stats.mean - 42.0).abs() < 1e-12);
366 }
367 #[test]
368 fn test_column_stats_negative_values() {
369 let mut csv = CsvFile::new(vec!["v".into()]);
370 csv.add_record_f64(&[-10.0]);
371 csv.add_record_f64(&[-5.0]);
372 csv.add_record_f64(&[0.0]);
373 let stats = csv.column_stats(0).unwrap();
374 assert!((stats.min - (-10.0)).abs() < 1e-12);
375 assert!((stats.max - 0.0).abs() < 1e-12);
376 assert!((stats.mean - (-5.0)).abs() < 1e-12);
377 }
378 #[test]
379 fn test_column_stats_text_column_returns_none() {
380 let mut csv = CsvFile::new(vec!["name".into()]);
381 csv.add_record(vec!["alice".into()]);
382 assert!(csv.column_stats(0).is_none());
383 }
384 #[test]
385 fn test_all_column_stats() {
386 let mut csv = CsvFile::new(vec!["x".into(), "label".into(), "y".into()]);
387 csv.add_record(vec!["1.0".into(), "a".into(), "10.0".into()]);
388 csv.add_record(vec!["2.0".into(), "b".into(), "20.0".into()]);
389 let stats = csv.all_column_stats();
390 assert_eq!(stats.len(), 2);
391 assert_eq!(stats[0].0, "x");
392 assert_eq!(stats[1].0, "y");
393 }
394 #[test]
395 fn test_detect_delimiter_comma() {
396 let s = "a,b,c\n1,2,3\n4,5,6\n";
397 assert_eq!(detect_delimiter(s), ',');
398 }
399 #[test]
400 fn test_detect_delimiter_tab() {
401 let s = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
402 assert_eq!(detect_delimiter(s), '\t');
403 }
404 #[test]
405 fn test_detect_delimiter_semicolon() {
406 let s = "a;b;c\n1;2;3\n4;5;6\n";
407 assert_eq!(detect_delimiter(s), ';');
408 }
409 #[test]
410 fn test_detect_delimiter_pipe() {
411 let s = "a|b|c\n1|2|3\n";
412 assert_eq!(detect_delimiter(s), '|');
413 }
414 #[test]
415 fn test_parse_auto_tab() {
416 let s = "time\tvalue\n0.0\t1.0\n1.0\t2.0\n";
417 let csv = parse_auto(s).unwrap();
418 assert_eq!(csv.headers, vec!["time", "value"]);
419 assert_eq!(csv.record_count(), 2);
420 }
421 #[test]
422 fn test_parse_auto_semicolon() {
423 let s = "x;y\n1;2\n3;4\n";
424 let csv = parse_auto(s).unwrap();
425 assert_eq!(csv.headers, vec!["x", "y"]);
426 let col = csv.get_column_f64(1).unwrap();
427 assert_eq!(col, vec![2.0, 4.0]);
428 }
429 #[test]
430 fn test_read_chunked_basic() {
431 let s = "x\n1\n2\n3\n4\n5\n";
432 let chunks = read_chunked(s, 2);
433 assert_eq!(chunks.len(), 3);
434 assert_eq!(chunks[0].record_count(), 2);
435 assert_eq!(chunks[1].record_count(), 2);
436 assert_eq!(chunks[2].record_count(), 1);
437 }
438 #[test]
439 fn test_read_chunked_exact_multiple() {
440 let s = "x\n1\n2\n3\n4\n";
441 let chunks = read_chunked(s, 2);
442 assert_eq!(chunks.len(), 2);
443 assert_eq!(chunks[0].record_count(), 2);
444 assert_eq!(chunks[1].record_count(), 2);
445 }
446 #[test]
447 fn test_read_chunked_larger_than_data() {
448 let s = "x\n1\n2\n";
449 let chunks = read_chunked(s, 100);
450 assert_eq!(chunks.len(), 1);
451 assert_eq!(chunks[0].record_count(), 2);
452 }
453 #[test]
454 fn test_read_chunked_preserves_headers() {
455 let s = "a,b\n1,2\n3,4\n5,6\n";
456 let chunks = read_chunked(s, 2);
457 for chunk in &chunks {
458 assert_eq!(chunk.headers, vec!["a", "b"]);
459 }
460 }
461 #[test]
462 fn test_to_string_with_delimiter() {
463 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
464 csv.add_record(vec!["1".into(), "2".into()]);
465 let s = csv.to_string_with_delimiter(';');
466 assert!(s.starts_with("a;b\n"));
467 assert!(s.contains("1;2"));
468 }
469 #[test]
470 fn test_from_str_with_delimiter() {
471 let s = "x;y\n1;2\n3;4\n";
472 let csv = CsvFile::from_str_with_delimiter(s, ';').unwrap();
473 assert_eq!(csv.headers, vec!["x", "y"]);
474 assert_eq!(csv.record_count(), 2);
475 }
476 #[test]
477 fn test_get_column_i64() {
478 let mut csv = CsvFile::new(vec!["n".into()]);
479 csv.add_record(vec!["42".into()]);
480 csv.add_record(vec!["-7".into()]);
481 let col = csv.get_column_i64(0).unwrap();
482 assert_eq!(col, vec![42, -7]);
483 }
484 #[test]
485 fn test_get_column_i64_parse_error() {
486 let mut csv = CsvFile::new(vec!["n".into()]);
487 csv.add_record(vec!["1.5".into()]);
488 assert!(csv.get_column_i64(0).is_err());
489 }
490 #[test]
491 fn test_get_column_strings() {
492 let mut csv = CsvFile::new(vec!["name".into()]);
493 csv.add_record(vec!["alice".into()]);
494 csv.add_record(vec!["bob".into()]);
495 let col = csv.get_column_strings(0).unwrap();
496 assert_eq!(col, vec!["alice", "bob"]);
497 }
498 #[test]
499 fn test_sort_by_column() {
500 let mut csv = CsvFile::new(vec!["v".into()]);
501 csv.add_record_f64(&[3.0]);
502 csv.add_record_f64(&[1.0]);
503 csv.add_record_f64(&[2.0]);
504 csv.sort_by_column(0);
505 let col = csv.get_column_f64(0).unwrap();
506 assert_eq!(col, vec![1.0, 2.0, 3.0]);
507 }
508 #[test]
509 fn test_sort_by_column_already_sorted() {
510 let mut csv = CsvFile::new(vec!["v".into()]);
511 csv.add_record_f64(&[1.0]);
512 csv.add_record_f64(&[2.0]);
513 csv.sort_by_column(0);
514 let col = csv.get_column_f64(0).unwrap();
515 assert_eq!(col, vec![1.0, 2.0]);
516 }
517 #[test]
518 fn test_normalize_header_fn() {
519 assert_eq!(normalize_header(" Time Step "), "time_step");
520 assert_eq!(normalize_header("X(m/s)"), "x_m_s_");
521 assert_eq!(normalize_header("abc_def"), "abc_def");
522 }
523 #[test]
524 fn test_delimiter_roundtrip_semicolon() {
525 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
526 csv.add_record_f64(&[1.0, 2.0]);
527 let s = csv.to_string_with_delimiter(';');
528 let parsed = CsvFile::from_str_with_delimiter(&s, ';').unwrap();
529 assert_eq!(parsed.headers, vec!["a", "b"]);
530 let col = parsed.get_column_f64(0).unwrap();
531 assert!((col[0] - 1.0).abs() < 1e-12);
532 }
533 #[test]
534 fn test_detect_delimiter_empty() {
535 assert_eq!(detect_delimiter(""), ',');
536 }
537 #[test]
538 fn test_chunked_empty_input() {
539 let chunks = read_chunked("", 5);
540 assert!(chunks.is_empty());
541 }
542 #[test]
543 fn test_select_columns_empty_indices() {
544 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
545 csv.add_record(vec!["1".into(), "2".into()]);
546 let subset = csv.select_columns(&[]);
547 assert!(subset.headers.is_empty());
548 assert_eq!(subset.records[0].fields.len(), 0);
549 }
550 #[test]
551 fn schema_validate_ok() {
552 let schema = CsvSchema::new(vec![
553 ("x".into(), ColumnType::Float),
554 ("label".into(), ColumnType::Text),
555 ]);
556 let mut csv = CsvFile::new(vec!["x".into(), "label".into()]);
557 csv.add_record(vec!["3.14".into(), "hello".into()]);
558 let errors = schema.validate(&csv);
559 assert!(errors.is_empty(), "unexpected errors: {errors:?}");
560 }
561 #[test]
562 fn schema_validate_type_mismatch() {
563 let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
564 let mut csv = CsvFile::new(vec!["x".into()]);
565 csv.add_record(vec!["not_an_int".into()]);
566 let errors = schema.validate(&csv);
567 assert!(!errors.is_empty(), "should report type error");
568 }
569 #[test]
570 fn schema_validate_column_count_mismatch() {
571 let schema = CsvSchema::new(vec![
572 ("a".into(), ColumnType::Float),
573 ("b".into(), ColumnType::Float),
574 ]);
575 let csv = CsvFile::new(vec!["a".into()]);
576 let errors = schema.validate(&csv);
577 assert!(!errors.is_empty());
578 }
579 #[test]
580 fn schema_validate_name_mismatch() {
581 let schema = CsvSchema::new(vec![("expected".into(), ColumnType::Text)]);
582 let csv = CsvFile::new(vec!["actual".into()]);
583 let errors = schema.validate(&csv);
584 assert!(!errors.is_empty());
585 }
586 #[test]
587 fn time_series_times_extracted() {
588 let input = "time,temp\n0.0,300.0\n1.0,301.0\n2.0,302.0\n";
589 let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
590 let times = ts.times().unwrap();
591 assert_eq!(times, vec![0.0, 1.0, 2.0]);
592 }
593 #[test]
594 fn time_series_duration() {
595 let input = "time,v\n1.0,0.0\n3.0,1.0\n5.0,2.0\n";
596 let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
597 assert!((ts.duration() - 4.0).abs() < 1e-10);
598 }
599 #[test]
600 fn time_series_n_steps() {
601 let input = "time,v\n0.0,1.0\n0.5,2.0\n";
602 let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
603 assert_eq!(ts.n_steps(), 2);
604 }
605 #[test]
606 fn time_series_missing_column() {
607 let input = "x,y\n1.0,2.0\n";
608 let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
609 assert!(ts.times().is_none());
610 }
611 fn sample_csv() -> CsvFile {
612 let mut csv = CsvFile::new(vec!["v".into()]);
613 csv.add_record_f64(&[1.0]);
614 csv.add_record_f64(&[2.0]);
615 csv.add_record_f64(&[3.0]);
616 csv.add_record_f64(&[4.0]);
617 csv
618 }
619 #[test]
620 fn aggregate_sum() {
621 let csv = sample_csv();
622 let s = aggregate_column(&csv, 0, AggOp::Sum).unwrap();
623 assert!((s - 10.0).abs() < 1e-10);
624 }
625 #[test]
626 fn aggregate_mean() {
627 let csv = sample_csv();
628 let m = aggregate_column(&csv, 0, AggOp::Mean).unwrap();
629 assert!((m - 2.5).abs() < 1e-10);
630 }
631 #[test]
632 fn aggregate_min_max() {
633 let csv = sample_csv();
634 assert!((aggregate_column(&csv, 0, AggOp::Min).unwrap() - 1.0).abs() < 1e-10);
635 assert!((aggregate_column(&csv, 0, AggOp::Max).unwrap() - 4.0).abs() < 1e-10);
636 }
637 #[test]
638 fn aggregate_count() {
639 let csv = sample_csv();
640 assert!((aggregate_column(&csv, 0, AggOp::Count).unwrap() - 4.0).abs() < 1e-10);
641 }
642 #[test]
643 fn aggregate_std() {
644 let csv = sample_csv();
645 let std = aggregate_column(&csv, 0, AggOp::Std).unwrap();
646 assert!((std - 1.25f64.sqrt()).abs() < 1e-9);
647 }
648 #[test]
649 fn aggregate_out_of_range() {
650 let csv = sample_csv();
651 assert!(aggregate_column(&csv, 99, AggOp::Sum).is_none());
652 }
653 #[test]
654 fn csv_writer_basic() {
655 let mut w = CsvWriter::new(vec!["x".into(), "y".into()], ',');
656 w.write_row(&["1", "2"]);
657 w.write_row(&["3", "4"]);
658 let s = w.finish();
659 assert!(s.starts_with("x,y\n"));
660 assert!(s.contains("1,2"));
661 }
662 #[test]
663 fn csv_writer_f64() {
664 let mut w = CsvWriter::new(vec!["val".into()], ',');
665 w.write_row_f64(&[2.54321]);
666 let s = w.finish();
667 assert!(s.contains("2.543210"));
668 }
669 #[test]
670 fn csv_writer_row_count() {
671 let mut w = CsvWriter::new(vec!["a".into()], ',');
672 for _ in 0..5 {
673 w.write_row(&["x"]);
674 }
675 assert_eq!(w.row_count(), 5);
676 }
677 #[test]
678 fn csv_writer_semicolon_delimiter() {
679 let mut w = CsvWriter::new(vec!["a".into(), "b".into()], ';');
680 w.write_row(&["1", "2"]);
681 let s = w.finish();
682 assert!(s.contains("a;b"));
683 assert!(s.contains("1;2"));
684 }
685 #[test]
686 fn validation_report_valid() {
687 let schema = CsvSchema::new(vec![("x".into(), ColumnType::Float)]);
688 let mut csv = CsvFile::new(vec!["x".into()]);
689 csv.add_record_f64(&[1.0]);
690 let report = validate_csv(&csv, &schema);
691 assert!(report.is_valid());
692 assert_eq!(report.error_count(), 0);
693 }
694 #[test]
695 fn validation_report_invalid() {
696 let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
697 let mut csv = CsvFile::new(vec!["x".into()]);
698 csv.add_record(vec!["hello".into()]);
699 let report = validate_csv(&csv, &schema);
700 assert!(!report.is_valid());
701 assert!(report.error_count() > 0);
702 }
703 #[test]
704 fn lazy_iter_yields_rows() {
705 let input = "a,b,c\n1,2,3\n4,5,6\n";
706 let mut iter = LazyCsvIter::new(input, ',');
707 assert_eq!(iter.headers, vec!["a", "b", "c"]);
708 let r1 = iter.next().unwrap();
709 assert_eq!(r1, vec!["1", "2", "3"]);
710 let r2 = iter.next().unwrap();
711 assert_eq!(r2, vec!["4", "5", "6"]);
712 assert!(iter.next().is_none());
713 }
714 #[test]
715 fn lazy_iter_empty_input() {
716 let mut iter = LazyCsvIter::new("", ',');
717 assert!(iter.headers.is_empty());
718 assert!(iter.next().is_none());
719 }
720 #[test]
721 fn lazy_iter_header_only() {
722 let mut iter = LazyCsvIter::new("x,y\n", ',');
723 assert_eq!(iter.headers, vec!["x", "y"]);
724 assert!(iter.next().is_none());
725 }
726 #[test]
727 fn lazy_iter_semicolon_delimiter() {
728 let input = "a;b\n10;20\n";
729 let mut iter = LazyCsvIter::new(input, ';');
730 assert_eq!(iter.headers, vec!["a", "b"]);
731 let row = iter.next().unwrap();
732 assert_eq!(row, vec!["10", "20"]);
733 }
734}
735#[allow(dead_code)]
746pub fn write_trajectory_csv(frames: &[TrajectoryFrame]) -> String {
747 let mut out = String::new();
748 for frame in frames {
749 if !frame.title.is_empty() {
750 out.push_str(&format!("# {}\n", frame.title));
751 }
752 for pos in &frame.positions {
753 out.push_str(&format!("{},{},{}\n", pos[0], pos[1], pos[2]));
754 }
755 out.push('\n');
756 }
757 out
758}
759#[allow(dead_code)]
766pub fn read_trajectory_csv(s: &str) -> std::result::Result<Vec<TrajectoryFrame>, String> {
767 let mut frames: Vec<TrajectoryFrame> = Vec::new();
768 let mut current = TrajectoryFrame::new();
769 let mut in_frame = false;
770 for (line_no, raw_line) in s.lines().enumerate() {
771 let line = raw_line.trim();
772 if line.is_empty() {
773 if in_frame {
774 frames.push(std::mem::take(&mut current));
775 in_frame = false;
776 }
777 continue;
778 }
779 if line.starts_with('#') {
780 current.title = line.trim_start_matches('#').trim().to_string();
781 in_frame = true;
782 continue;
783 }
784 let parts: Vec<&str> = line.split(',').collect();
785 if parts.len() != 3 {
786 return Err(format!(
787 "line {}: expected 3 comma-separated values, got {}",
788 line_no + 1,
789 parts.len()
790 ));
791 }
792 let x: f64 = parts[0]
793 .trim()
794 .parse()
795 .map_err(|e| format!("line {}: x parse error: {}", line_no + 1, e))?;
796 let y: f64 = parts[1]
797 .trim()
798 .parse()
799 .map_err(|e| format!("line {}: y parse error: {}", line_no + 1, e))?;
800 let z: f64 = parts[2]
801 .trim()
802 .parse()
803 .map_err(|e| format!("line {}: z parse error: {}", line_no + 1, e))?;
804 current.positions.push([x, y, z]);
805 in_frame = true;
806 }
807 if in_frame {
808 frames.push(current);
809 }
810 Ok(frames)
811}
812#[cfg(test)]
813mod tests_dataframe {
814 use super::*;
815 use crate::csv_io::types::*;
816 #[test]
817 fn dataframe_from_csv_types() {
818 let csv_str = "id,x,label\n1,3.14,hello\n2,2.71,world\n";
819 let df = CsvDataFrame::from_str(csv_str).unwrap();
820 assert_eq!(df.n_cols(), 3);
821 assert_eq!(df.n_rows(), 2);
822 assert_eq!(df.column(0).unwrap().column_type(), ColumnType::Integer);
823 assert_eq!(df.column(1).unwrap().column_type(), ColumnType::Float);
824 assert_eq!(df.column(2).unwrap().column_type(), ColumnType::Text);
825 }
826 #[test]
827 fn dataframe_float_column_by_name() {
828 let csv_str = "t,v\n0.0,1.5\n1.0,2.5\n";
829 let df = CsvDataFrame::from_str(csv_str).unwrap();
830 let v = df.float_column("v").unwrap();
831 assert_eq!(v.len(), 2);
832 assert!((v[0] - 1.5).abs() < 1e-12);
833 assert!((v[1] - 2.5).abs() < 1e-12);
834 }
835 #[test]
836 fn dataframe_integer_column_by_name() {
837 let csv_str = "n,label\n10,a\n20,b\n";
838 let df = CsvDataFrame::from_str(csv_str).unwrap();
839 let n = df.integer_column("n").unwrap();
840 assert_eq!(n, &vec![10_i64, 20_i64]);
841 }
842 #[test]
843 fn dataframe_text_column_by_name() {
844 let csv_str = "name,val\nalice,1.0\nbob,2.0\n";
845 let df = CsvDataFrame::from_str(csv_str).unwrap();
846 let names = df.text_column("name").unwrap();
847 assert_eq!(names, &vec!["alice".to_string(), "bob".to_string()]);
848 }
849 #[test]
850 fn dataframe_column_index_missing() {
851 let csv_str = "a,b\n1,2\n";
852 let df = CsvDataFrame::from_str(csv_str).unwrap();
853 assert!(df.column_index("nope").is_none());
854 }
855 #[test]
856 fn dataframe_to_csv_string_roundtrip() {
857 let csv_str = "x,y\n1.5,2.5\n3.5,4.5\n";
858 let df = CsvDataFrame::from_str(csv_str).unwrap();
859 let out = df.to_csv_string();
860 assert!(out.contains("x,y"));
861 let df2 = CsvDataFrame::from_str(&out).unwrap();
862 let x = df2.float_column("x").unwrap();
863 assert!((x[0] - 1.5).abs() < 1e-12);
864 assert!((x[1] - 3.5).abs() < 1e-12);
865 }
866 #[test]
867 fn dataframe_empty_input() {
868 assert!(CsvDataFrame::from_str("").is_err());
869 }
870 #[test]
871 fn dataframe_n_rows_n_cols() {
872 let csv_str = "a,b,c\n1,2,3\n4,5,6\n7,8,9\n";
873 let df = CsvDataFrame::from_str(csv_str).unwrap();
874 assert_eq!(df.n_rows(), 3);
875 assert_eq!(df.n_cols(), 3);
876 }
877 #[test]
878 fn dataframe_column_by_name_returns_none_for_missing() {
879 let csv_str = "x\n1.0\n";
880 let df = CsvDataFrame::from_str(csv_str).unwrap();
881 assert!(df.column_by_name("missing").is_none());
882 }
883 #[test]
884 fn dataframe_with_delimiter() {
885 let csv_str = "x;y\n1.0;2.0\n3.0;4.0\n";
886 let df = CsvDataFrame::from_str_with_delimiter(csv_str, ';').unwrap();
887 assert_eq!(df.n_cols(), 2);
888 let y = df.float_column("y").unwrap();
889 assert!((y[0] - 2.0).abs() < 1e-12);
890 }
891 #[test]
892 fn trajectory_write_read_roundtrip() {
893 let frames = vec![
894 TrajectoryFrame {
895 title: "frame 0".to_string(),
896 positions: vec![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
897 },
898 TrajectoryFrame {
899 title: "frame 1".to_string(),
900 positions: vec![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
901 },
902 ];
903 let csv = write_trajectory_csv(&frames);
904 let parsed = read_trajectory_csv(&csv).unwrap();
905 assert_eq!(parsed.len(), 2);
906 assert_eq!(parsed[0].title, "frame 0");
907 assert_eq!(parsed[0].n_atoms(), 2);
908 assert!((parsed[0].positions[0][0] - 1.0).abs() < 1e-12);
909 assert!((parsed[1].positions[1][2] - 0.6).abs() < 1e-12);
910 }
911 #[test]
912 fn trajectory_single_frame_no_title() {
913 let csv = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
914 let frames = read_trajectory_csv(csv).unwrap();
915 assert_eq!(frames.len(), 1);
916 assert_eq!(frames[0].n_atoms(), 2);
917 assert!(frames[0].title.is_empty());
918 }
919 #[test]
920 fn trajectory_empty_input() {
921 let frames = read_trajectory_csv("").unwrap();
922 assert!(frames.is_empty());
923 }
924 #[test]
925 fn trajectory_error_on_bad_value() {
926 let csv = "1.0,not_a_float,3.0\n";
927 assert!(read_trajectory_csv(csv).is_err());
928 }
929 #[test]
930 fn trajectory_error_on_wrong_column_count() {
931 let csv = "1.0,2.0\n";
932 assert!(read_trajectory_csv(csv).is_err());
933 }
934 #[test]
935 fn trajectory_multiple_frames_position_accuracy() {
936 let frames = vec![TrajectoryFrame {
937 title: String::new(),
938 positions: vec![[0.123456789, -0.987654321, 1.111111111]],
939 }];
940 let csv = write_trajectory_csv(&frames);
941 let parsed = read_trajectory_csv(&csv).unwrap();
942 assert!((parsed[0].positions[0][0] - 0.123456789).abs() < 1e-9);
943 assert!((parsed[0].positions[0][1] - (-0.987654321)).abs() < 1e-9);
944 assert!((parsed[0].positions[0][2] - 1.111111111).abs() < 1e-9);
945 }
946 #[test]
947 fn trajectory_frame_n_atoms() {
948 let f = TrajectoryFrame {
949 title: "t".into(),
950 positions: vec![[0.0; 3]; 5],
951 };
952 assert_eq!(f.n_atoms(), 5);
953 }
954 #[test]
955 fn csv_column_data_len_and_type() {
956 let col = CsvColumnData::Float(vec![1.0, 2.0, 3.0]);
957 assert_eq!(col.len(), 3);
958 assert!(!col.is_empty());
959 assert_eq!(col.column_type(), ColumnType::Float);
960 let int_col = CsvColumnData::Integer(vec![10, 20]);
961 assert_eq!(int_col.len(), 2);
962 assert_eq!(int_col.column_type(), ColumnType::Integer);
963 let text_col = CsvColumnData::Text(vec!["a".into()]);
964 assert_eq!(text_col.column_type(), ColumnType::Text);
965 assert!(!text_col.is_empty());
966 }
967}
968#[allow(dead_code)]
973pub fn merge_csv_files(base: &CsvFile, other: &CsvFile) -> Result<CsvFile, String> {
974 if base.headers != other.headers {
975 return Err(format!(
976 "header mismatch: {:?} vs {:?}",
977 base.headers, other.headers
978 ));
979 }
980 let mut result = CsvFile::new(base.headers.clone());
981 for rec in &base.records {
982 result.records.push(CsvRecord {
983 fields: rec.fields.clone(),
984 });
985 }
986 for rec in &other.records {
987 result.records.push(CsvRecord {
988 fields: rec.fields.clone(),
989 });
990 }
991 Ok(result)
992}
993#[allow(dead_code)]
998pub fn transpose_csv(csv: &CsvFile) -> CsvFile {
999 let n_rows = csv.records.len();
1000 let n_cols = csv.headers.len();
1001 if n_rows == 0 || n_cols == 0 {
1002 return CsvFile::new(vec![]);
1003 }
1004 let new_headers: Vec<String> = (0..n_rows).map(|i| format!("col_{}", i)).collect();
1005 let mut result = CsvFile::new(new_headers);
1006 for col in 0..n_cols {
1007 let fields: Vec<String> = (0..n_rows)
1008 .map(|row| {
1009 csv.records[row]
1010 .fields
1011 .get(col)
1012 .cloned()
1013 .unwrap_or_default()
1014 })
1015 .collect();
1016 result.records.push(CsvRecord { fields });
1017 }
1018 result
1019}
1020#[allow(dead_code)]
1026pub fn inner_join_csv(left: &CsvFile, right: &CsvFile, key: &str) -> Result<CsvFile, String> {
1027 let left_key_idx = left
1028 .get_column_by_name(key)
1029 .ok_or_else(|| format!("key '{}' not in left file", key))?;
1030 let right_key_idx = right
1031 .get_column_by_name(key)
1032 .ok_or_else(|| format!("key '{}' not in right file", key))?;
1033 let mut headers = left.headers.clone();
1034 for (i, h) in right.headers.iter().enumerate() {
1035 if i != right_key_idx {
1036 headers.push(h.clone());
1037 }
1038 }
1039 let mut result = CsvFile::new(headers);
1040 for l_rec in &left.records {
1041 let l_key = l_rec.fields.get(left_key_idx).cloned().unwrap_or_default();
1042 for r_rec in &right.records {
1043 let r_key = r_rec.fields.get(right_key_idx).cloned().unwrap_or_default();
1044 if l_key == r_key {
1045 let mut fields = l_rec.fields.clone();
1046 for (i, f) in r_rec.fields.iter().enumerate() {
1047 if i != right_key_idx {
1048 fields.push(f.clone());
1049 }
1050 }
1051 result.records.push(CsvRecord { fields });
1052 }
1053 }
1054 }
1055 Ok(result)
1056}
1057#[allow(dead_code)]
1062pub fn diff_csv(a: &CsvFile, b: &CsvFile) -> Result<CsvFile, String> {
1063 if a.headers != b.headers {
1064 return Err("header mismatch".to_string());
1065 }
1066 if a.records.len() != b.records.len() {
1067 return Err(format!(
1068 "row count mismatch: {} vs {}",
1069 a.records.len(),
1070 b.records.len()
1071 ));
1072 }
1073 let mut result = CsvFile::new(a.headers.clone());
1074 for (i, (ar, br)) in a.records.iter().zip(b.records.iter()).enumerate() {
1075 let mut fields = Vec::with_capacity(a.headers.len());
1076 for (j, (af, bf)) in ar.fields.iter().zip(br.fields.iter()).enumerate() {
1077 let av: f64 = af
1078 .parse()
1079 .map_err(|_| format!("row {} col {}: not numeric", i, j))?;
1080 let bv: f64 = bf
1081 .parse()
1082 .map_err(|_| format!("row {} col {}: not numeric", i, j))?;
1083 fields.push(format!("{}", av - bv));
1084 }
1085 result.records.push(CsvRecord { fields });
1086 }
1087 Ok(result)
1088}
1089#[allow(dead_code)]
1094pub fn has_header(s: &str, delim: char) -> bool {
1095 let first = s.lines().next().unwrap_or("");
1096 first.split(delim).any(|f| {
1097 let t = f.trim();
1098 !t.is_empty() && t.parse::<f64>().is_err()
1099 })
1100}
1101#[allow(dead_code)]
1106pub fn infer_headers(s: &str, delim: char) -> Vec<String> {
1107 let n = s
1108 .lines()
1109 .next()
1110 .map(|l| l.split(delim).count())
1111 .unwrap_or(0);
1112 (0..n).map(|i| format!("col_{}", i)).collect()
1113}
1114#[allow(dead_code)]
1120pub fn parse_smart(s: &str) -> Result<CsvFile, String> {
1121 let delim = detect_delimiter(s);
1122 if has_header(s, delim) {
1123 CsvFile::from_str_with_delimiter(s, delim)
1124 } else {
1125 let headers = infer_headers(s, delim);
1126 let mut result = CsvFile::new(headers);
1127 for line in s.lines() {
1128 if line.trim().is_empty() {
1129 continue;
1130 }
1131 let fields: Vec<String> = line.split(delim).map(str::trim).map(String::from).collect();
1132 result.add_record(fields);
1133 }
1134 Ok(result)
1135 }
1136}
1137#[allow(dead_code)]
1143pub fn is_boolean_column(csv: &CsvFile, col_idx: usize) -> bool {
1144 let truthy = ["true", "1", "yes", "on"];
1145 let falsy = ["false", "0", "no", "off"];
1146 if col_idx >= csv.headers.len() {
1147 return false;
1148 }
1149 for rec in &csv.records {
1150 if let Some(v) = rec.fields.get(col_idx) {
1151 let lower = v.trim().to_lowercase();
1152 if !truthy.contains(&lower.as_str()) && !falsy.contains(&lower.as_str()) {
1153 return false;
1154 }
1155 }
1156 }
1157 true
1158}
1159#[allow(dead_code)]
1163pub fn get_column_bool(csv: &CsvFile, col_idx: usize) -> Result<Vec<bool>, String> {
1164 let truthy = ["true", "1", "yes", "on"];
1165 let falsy = ["false", "0", "no", "off"];
1166 if col_idx >= csv.headers.len() {
1167 return Err(format!("column index {} out of range", col_idx));
1168 }
1169 let mut out = Vec::with_capacity(csv.records.len());
1170 for (row, rec) in csv.records.iter().enumerate() {
1171 let raw = rec
1172 .fields
1173 .get(col_idx)
1174 .ok_or_else(|| format!("row {} has no field at col {}", row, col_idx))?;
1175 let lower = raw.trim().to_lowercase();
1176 if truthy.contains(&lower.as_str()) {
1177 out.push(true);
1178 } else if falsy.contains(&lower.as_str()) {
1179 out.push(false);
1180 } else {
1181 return Err(format!("row {}: '{}' is not a boolean value", row, raw));
1182 }
1183 }
1184 Ok(out)
1185}
1186#[allow(dead_code)]
1190pub fn sample_every_nth(csv: &CsvFile, stride: usize) -> CsvFile {
1191 if stride == 0 {
1192 return CsvFile::new(csv.headers.clone());
1193 }
1194 let mut result = CsvFile::new(csv.headers.clone());
1195 for (i, rec) in csv.records.iter().enumerate() {
1196 if i % stride == 0 {
1197 result.records.push(CsvRecord {
1198 fields: rec.fields.clone(),
1199 });
1200 }
1201 }
1202 result
1203}
1204#[allow(dead_code)]
1206pub fn dedup_by_column(csv: &CsvFile, col_idx: usize) -> CsvFile {
1207 use std::collections::HashSet;
1208 let mut seen: HashSet<String> = HashSet::new();
1209 let mut result = CsvFile::new(csv.headers.clone());
1210 for rec in &csv.records {
1211 let key = rec.fields.get(col_idx).cloned().unwrap_or_default();
1212 if seen.insert(key) {
1213 result.records.push(CsvRecord {
1214 fields: rec.fields.clone(),
1215 });
1216 }
1217 }
1218 result
1219}
1220#[cfg(test)]
1221mod tests_csv_new {
1222 use super::*;
1223 use crate::csv_io::types::*;
1224 #[test]
1225 fn streaming_reader_headers() {
1226 let input = "time,x,y\n0.0,1.0,2.0\n1.0,3.0,4.0\n";
1227 let r = StreamingCsvReader::new(input, ',');
1228 assert_eq!(r.headers, vec!["time", "x", "y"]);
1229 assert_eq!(r.n_cols(), 3);
1230 }
1231 #[test]
1232 fn streaming_reader_next_row() {
1233 let input = "a,b\n1,2\n3,4\n";
1234 let mut r = StreamingCsvReader::new(input, ',');
1235 let row = r.next_row().unwrap();
1236 assert_eq!(row, vec!["1", "2"]);
1237 assert_eq!(r.current_row(), 1);
1238 let row2 = r.next_row().unwrap();
1239 assert_eq!(row2, vec!["3", "4"]);
1240 }
1241 #[test]
1242 fn streaming_reader_eof() {
1243 let input = "a\n1\n";
1244 let mut r = StreamingCsvReader::new(input, ',');
1245 r.next_row();
1246 assert!(r.next_row().is_none());
1247 }
1248 #[test]
1249 fn streaming_reader_skips_blank_lines() {
1250 let input = "a\n1\n\n2\n";
1251 let mut r = StreamingCsvReader::new(input, ',');
1252 r.next_row();
1253 let row = r.next_row().unwrap();
1254 assert_eq!(row, vec!["2"]);
1255 }
1256 #[test]
1257 fn streaming_reader_collect_all() {
1258 let input = "x,y\n1,2\n3,4\n5,6\n";
1259 let r = StreamingCsvReader::new(input, ',');
1260 let csv = r.collect_all();
1261 assert_eq!(csv.record_count(), 3);
1262 assert_eq!(csv.headers, vec!["x", "y"]);
1263 }
1264 #[test]
1265 fn streaming_reader_auto_delimiter() {
1266 let input = "a\tb\tc\n1\t2\t3\n";
1267 let r = StreamingCsvReader::auto(input);
1268 assert_eq!(r.delimiter, '\t');
1269 assert_eq!(r.headers, vec!["a", "b", "c"]);
1270 }
1271 #[test]
1272 fn merge_csv_files_basic() {
1273 let mut a = CsvFile::new(vec!["x".into()]);
1274 a.add_record_f64(&[1.0]);
1275 let mut b = CsvFile::new(vec!["x".into()]);
1276 b.add_record_f64(&[2.0]);
1277 b.add_record_f64(&[3.0]);
1278 let merged = merge_csv_files(&a, &b).unwrap();
1279 assert_eq!(merged.record_count(), 3);
1280 }
1281 #[test]
1282 fn merge_csv_files_header_mismatch() {
1283 let a = CsvFile::new(vec!["x".into()]);
1284 let b = CsvFile::new(vec!["y".into()]);
1285 assert!(merge_csv_files(&a, &b).is_err());
1286 }
1287 #[test]
1288 fn transpose_basic() {
1289 let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
1290 csv.add_record(vec!["1".into(), "2".into()]);
1291 csv.add_record(vec!["3".into(), "4".into()]);
1292 let t = transpose_csv(&csv);
1293 assert_eq!(t.headers.len(), 2);
1294 assert_eq!(t.record_count(), 2);
1295 assert_eq!(t.records[0].fields, vec!["1", "3"]);
1296 assert_eq!(t.records[1].fields, vec!["2", "4"]);
1297 }
1298 #[test]
1299 fn transpose_empty() {
1300 let csv = CsvFile::new(vec![]);
1301 let t = transpose_csv(&csv);
1302 assert!(t.headers.is_empty());
1303 }
1304 #[test]
1305 fn inner_join_basic() {
1306 let mut left = CsvFile::new(vec!["id".into(), "name".into()]);
1307 left.add_record(vec!["1".into(), "alice".into()]);
1308 left.add_record(vec!["2".into(), "bob".into()]);
1309 let mut right = CsvFile::new(vec!["id".into(), "score".into()]);
1310 right.add_record(vec!["1".into(), "90".into()]);
1311 right.add_record(vec!["3".into(), "80".into()]);
1312 let joined = inner_join_csv(&left, &right, "id").unwrap();
1313 assert_eq!(joined.record_count(), 1);
1314 assert_eq!(joined.headers, vec!["id", "name", "score"]);
1315 assert_eq!(joined.records[0].fields[1], "alice");
1316 assert_eq!(joined.records[0].fields[2], "90");
1317 }
1318 #[test]
1319 fn inner_join_missing_key() {
1320 let left = CsvFile::new(vec!["a".into()]);
1321 let right = CsvFile::new(vec!["b".into()]);
1322 assert!(inner_join_csv(&left, &right, "id").is_err());
1323 }
1324 #[test]
1325 fn diff_csv_basic() {
1326 let mut a = CsvFile::new(vec!["v".into()]);
1327 a.add_record(vec!["5.0".into()]);
1328 a.add_record(vec!["3.0".into()]);
1329 let mut b = CsvFile::new(vec!["v".into()]);
1330 b.add_record(vec!["1.0".into()]);
1331 b.add_record(vec!["1.0".into()]);
1332 let d = diff_csv(&a, &b).unwrap();
1333 let vals = d.get_column_f64(0).unwrap();
1334 assert!((vals[0] - 4.0).abs() < 1e-12);
1335 assert!((vals[1] - 2.0).abs() < 1e-12);
1336 }
1337 #[test]
1338 fn diff_csv_row_count_mismatch() {
1339 let mut a = CsvFile::new(vec!["v".into()]);
1340 a.add_record(vec!["1.0".into()]);
1341 let b = CsvFile::new(vec!["v".into()]);
1342 assert!(diff_csv(&a, &b).is_err());
1343 }
1344 #[test]
1345 fn has_header_true() {
1346 assert!(has_header("time,x,y\n0,1,2\n", ','));
1347 }
1348 #[test]
1349 fn has_header_false_all_numbers() {
1350 assert!(!has_header("0,1,2\n3,4,5\n", ','));
1351 }
1352 #[test]
1353 fn infer_headers_count() {
1354 let headers = infer_headers("1,2,3,4\n", ',');
1355 assert_eq!(headers, vec!["col_0", "col_1", "col_2", "col_3"]);
1356 }
1357 #[test]
1358 fn parse_smart_with_header() {
1359 let s = "a,b\n1,2\n3,4\n";
1360 let csv = parse_smart(s).unwrap();
1361 assert_eq!(csv.headers, vec!["a", "b"]);
1362 assert_eq!(csv.record_count(), 2);
1363 }
1364 #[test]
1365 fn parse_smart_without_header() {
1366 let s = "1,2\n3,4\n";
1367 let csv = parse_smart(s).unwrap();
1368 assert_eq!(csv.headers, vec!["col_0", "col_1"]);
1369 assert_eq!(csv.record_count(), 2);
1370 }
1371 #[test]
1372 fn boolean_column_detection() {
1373 let mut csv = CsvFile::new(vec!["flag".into()]);
1374 csv.add_record(vec!["true".into()]);
1375 csv.add_record(vec!["false".into()]);
1376 csv.add_record(vec!["yes".into()]);
1377 assert!(is_boolean_column(&csv, 0));
1378 }
1379 #[test]
1380 fn boolean_column_rejection() {
1381 let mut csv = CsvFile::new(vec!["v".into()]);
1382 csv.add_record(vec!["true".into()]);
1383 csv.add_record(vec!["maybe".into()]);
1384 assert!(!is_boolean_column(&csv, 0));
1385 }
1386 #[test]
1387 fn get_column_bool_values() {
1388 let mut csv = CsvFile::new(vec!["b".into()]);
1389 csv.add_record(vec!["1".into()]);
1390 csv.add_record(vec!["0".into()]);
1391 csv.add_record(vec!["yes".into()]);
1392 csv.add_record(vec!["no".into()]);
1393 let vals = get_column_bool(&csv, 0).unwrap();
1394 assert_eq!(vals, vec![true, false, true, false]);
1395 }
1396 #[test]
1397 fn get_column_bool_error_on_bad() {
1398 let mut csv = CsvFile::new(vec!["b".into()]);
1399 csv.add_record(vec!["maybe".into()]);
1400 assert!(get_column_bool(&csv, 0).is_err());
1401 }
1402 #[test]
1403 fn sample_every_nth_basic() {
1404 let mut csv = CsvFile::new(vec!["v".into()]);
1405 for i in 0..10_usize {
1406 csv.add_record(vec![i.to_string()]);
1407 }
1408 let sampled = sample_every_nth(&csv, 3);
1409 assert_eq!(sampled.record_count(), 4);
1410 }
1411 #[test]
1412 fn sample_every_nth_stride_one() {
1413 let mut csv = CsvFile::new(vec!["v".into()]);
1414 csv.add_record_f64(&[1.0]);
1415 csv.add_record_f64(&[2.0]);
1416 let s = sample_every_nth(&csv, 1);
1417 assert_eq!(s.record_count(), 2);
1418 }
1419 #[test]
1420 fn sample_every_nth_zero_stride() {
1421 let mut csv = CsvFile::new(vec!["v".into()]);
1422 csv.add_record_f64(&[1.0]);
1423 let s = sample_every_nth(&csv, 0);
1424 assert_eq!(s.record_count(), 0);
1425 }
1426 #[test]
1427 fn dedup_by_column_basic() {
1428 let mut csv = CsvFile::new(vec!["id".into(), "val".into()]);
1429 csv.add_record(vec!["1".into(), "a".into()]);
1430 csv.add_record(vec!["2".into(), "b".into()]);
1431 csv.add_record(vec!["1".into(), "c".into()]);
1432 let deduped = dedup_by_column(&csv, 0);
1433 assert_eq!(deduped.record_count(), 2);
1434 assert_eq!(deduped.records[0].fields[1], "a");
1435 }
1436 #[test]
1437 fn dedup_by_column_all_unique() {
1438 let mut csv = CsvFile::new(vec!["id".into()]);
1439 for i in 0..5_usize {
1440 csv.add_record(vec![i.to_string()]);
1441 }
1442 let d = dedup_by_column(&csv, 0);
1443 assert_eq!(d.record_count(), 5);
1444 }
1445 #[test]
1446 fn lazy_iter_yields_correct_rows() {
1447 let input = "x,y\n1,2\n3,4\n5,6\n";
1448 let mut it = LazyCsvIter::new(input, ',');
1449 assert_eq!(it.headers, vec!["x", "y"]);
1450 let r1 = it.next().unwrap();
1451 assert_eq!(r1, vec!["1", "2"]);
1452 let r2 = it.next().unwrap();
1453 assert_eq!(r2, vec!["3", "4"]);
1454 let r3 = it.next().unwrap();
1455 assert_eq!(r3, vec!["5", "6"]);
1456 assert!(it.next().is_none());
1457 }
1458 #[test]
1459 fn lazy_iter_tab_delimiter() {
1460 let input = "a\tb\n10\t20\n";
1461 let mut it = LazyCsvIter::new(input, '\t');
1462 assert_eq!(it.headers, vec!["a", "b"]);
1463 let row = it.next().unwrap();
1464 assert_eq!(row[0], "10");
1465 assert_eq!(row[1], "20");
1466 }
1467 #[test]
1468 fn validation_report_is_valid() {
1469 let schema = CsvSchema::new(vec![("x".into(), ColumnType::Float)]);
1470 let mut csv = CsvFile::new(vec!["x".into()]);
1471 csv.add_record(vec!["3.14".into()]);
1472 let report = validate_csv(&csv, &schema);
1473 assert!(report.is_valid());
1474 assert_eq!(report.error_count(), 0);
1475 }
1476 #[test]
1477 fn validation_report_has_errors() {
1478 let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
1479 let mut csv = CsvFile::new(vec!["x".into()]);
1480 csv.add_record(vec!["not_int".into()]);
1481 let report = validate_csv(&csv, &schema);
1482 assert!(!report.is_valid());
1483 assert!(report.error_count() > 0);
1484 }
1485 #[test]
1486 fn normalize_header_special_chars() {
1487 assert_eq!(normalize_header("E (J/mol)"), "e__j_mol_");
1488 }
1489 #[test]
1490 fn normalize_header_already_clean() {
1491 assert_eq!(normalize_header("velocity_x"), "velocity_x");
1492 }
1493}