Skip to main content

oxiphysics_io/csv_io/
functions.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::types::{AggOp, CsvFile, CsvRecord, CsvSchema, CsvValidationReport, TrajectoryFrame};
6
7/// Build a two-column time-series CSV string from parallel slices.
8pub fn write_timeseries(
9    times: &[f64],
10    values: &[f64],
11    header_time: &str,
12    header_val: &str,
13) -> String {
14    let mut csv = CsvFile::new(vec![header_time.to_string(), header_val.to_string()]);
15    for (&t, &v) in times.iter().zip(values.iter()) {
16        csv.add_record_f64(&[t, v]);
17    }
18    csv.to_string()
19}
20/// Extract a named column from a `CsvFile` as `f64`.
21pub fn parse_column_f64(csv: &CsvFile, name: &str) -> Result<Vec<f64>, String> {
22    let idx = csv
23        .get_column_by_name(name)
24        .ok_or_else(|| format!("Column '{}' not found", name))?;
25    csv.get_column_f64(idx)
26}
27/// Auto-detect the delimiter used in a CSV string.
28///
29/// Tests comma, tab, semicolon, and pipe. Returns the delimiter that produces
30/// the most consistent column count across the first few lines.
31#[allow(dead_code)]
32pub fn detect_delimiter(s: &str) -> char {
33    let candidates = [',', '\t', ';', '|'];
34    let lines: Vec<&str> = s.lines().take(10).collect();
35    if lines.is_empty() {
36        return ',';
37    }
38    let mut best_delim = ',';
39    let mut best_score: usize = 0;
40    for &delim in &candidates {
41        let counts: Vec<usize> = lines.iter().map(|l| l.split(delim).count()).collect();
42        if counts.is_empty() {
43            continue;
44        }
45        let first = counts[0];
46        if first < 2 {
47            continue;
48        }
49        let consistent = counts.iter().filter(|&&c| c == first).count();
50        let score = consistent * first;
51        if score > best_score {
52            best_score = score;
53            best_delim = delim;
54        }
55    }
56    best_delim
57}
58/// Parse a CSV string with auto-detected delimiter.
59#[allow(dead_code)]
60pub fn parse_auto(s: &str) -> Result<CsvFile, String> {
61    let delim = detect_delimiter(s);
62    CsvFile::from_str_with_delimiter(s, delim)
63}
64/// Read a CSV in chunks: returns an iterator of `CsvFile` objects, each
65/// containing at most `chunk_size` records.
66#[allow(dead_code)]
67pub fn read_chunked(s: &str, chunk_size: usize) -> Vec<CsvFile> {
68    let full = match CsvFile::from_str(s) {
69        Ok(f) => f,
70        Err(_) => return Vec::new(),
71    };
72    if chunk_size == 0 {
73        return vec![full];
74    }
75    let mut chunks = Vec::new();
76    let mut start = 0;
77    while start < full.records.len() {
78        let end = (start + chunk_size).min(full.records.len());
79        let mut chunk = CsvFile::new(full.headers.clone());
80        for i in start..end {
81            chunk.records.push(CsvRecord {
82                fields: full.records[i].fields.clone(),
83            });
84        }
85        chunks.push(chunk);
86        start = end;
87    }
88    chunks
89}
90/// Normalize a header string: lowercase, replace non-alphanumeric with underscores.
91#[allow(dead_code)]
92pub fn normalize_header(s: &str) -> String {
93    s.trim()
94        .to_lowercase()
95        .chars()
96        .map(|c| {
97            if c.is_alphanumeric() || c == '_' {
98                c
99            } else {
100                '_'
101            }
102        })
103        .collect()
104}
105/// Aggregate a numeric column of a [`CsvFile`] using the given [`AggOp`].
106///
107/// Returns `None` if the column index is out of range or no numeric values
108/// are found.
109#[allow(dead_code)]
110pub fn aggregate_column(csv: &CsvFile, col: usize, op: AggOp) -> Option<f64> {
111    let vals = csv.get_column_f64(col).ok()?;
112    if vals.is_empty() {
113        return None;
114    }
115    let n = vals.len() as f64;
116    Some(match op {
117        AggOp::Sum => vals.iter().sum(),
118        AggOp::Mean => vals.iter().sum::<f64>() / n,
119        AggOp::Min => vals.iter().cloned().fold(f64::INFINITY, f64::min),
120        AggOp::Max => vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max),
121        AggOp::Std => {
122            let mean = vals.iter().sum::<f64>() / n;
123            let var = vals.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n;
124            var.sqrt()
125        }
126        AggOp::Count => vals.len() as f64,
127    })
128}
129/// Validate a [`CsvFile`] against a [`CsvSchema`] and return a report.
130#[allow(dead_code)]
131pub fn validate_csv(csv: &CsvFile, schema: &CsvSchema) -> CsvValidationReport {
132    CsvValidationReport {
133        errors: schema.validate(csv),
134    }
135}
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use crate::csv_io::types::*;
140    #[test]
141    fn test_new_empty() {
142        let csv = CsvFile::new(vec!["x".into(), "y".into()]);
143        assert_eq!(csv.column_count(), 2);
144        assert_eq!(csv.record_count(), 0);
145    }
146    #[test]
147    fn test_add_record_string() {
148        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
149        csv.add_record(vec!["1".into(), "2".into()]);
150        assert_eq!(csv.record_count(), 1);
151        assert_eq!(csv.records[0].fields[0], "1");
152    }
153    #[test]
154    fn test_add_record_f64() {
155        let mut csv = CsvFile::new(vec!["t".into(), "v".into()]);
156        csv.add_record_f64(&[0.0, 9.81]);
157        assert_eq!(csv.record_count(), 1);
158        assert_eq!(csv.records[0].fields[1], "9.81");
159    }
160    #[test]
161    fn test_get_column_f64_valid() {
162        let mut csv = CsvFile::new(vec!["x".into()]);
163        csv.add_record_f64(&[1.5]);
164        csv.add_record_f64(&[3.0]);
165        let col = csv.get_column_f64(0).unwrap();
166        assert_eq!(col, vec![1.5, 3.0]);
167    }
168    #[test]
169    fn test_get_column_f64_out_of_range() {
170        let csv = CsvFile::new(vec!["x".into()]);
171        assert!(csv.get_column_f64(5).is_err());
172    }
173    #[test]
174    fn test_get_column_f64_parse_error() {
175        let mut csv = CsvFile::new(vec!["x".into()]);
176        csv.add_record(vec!["not_a_number".into()]);
177        assert!(csv.get_column_f64(0).is_err());
178    }
179    #[test]
180    fn test_get_column_by_name_found() {
181        let csv = CsvFile::new(vec!["time".into(), "energy".into()]);
182        assert_eq!(csv.get_column_by_name("energy"), Some(1));
183    }
184    #[test]
185    fn test_get_column_by_name_missing() {
186        let csv = CsvFile::new(vec!["time".into()]);
187        assert!(csv.get_column_by_name("missing").is_none());
188    }
189    #[test]
190    fn test_to_string_roundtrip() {
191        let mut csv = CsvFile::new(vec!["t".into(), "x".into()]);
192        csv.add_record_f64(&[0.0, 1.0]);
193        csv.add_record_f64(&[1.0, 2.0]);
194        let s = csv.to_string();
195        let parsed = CsvFile::from_str(&s).unwrap();
196        assert_eq!(parsed.headers, vec!["t", "x"]);
197        assert_eq!(parsed.record_count(), 2);
198    }
199    #[test]
200    fn test_from_str_with_spaces() {
201        let s = "time , value\n0.0 , 1.0\n1.0 , 2.0\n";
202        let csv = CsvFile::from_str(s).unwrap();
203        assert_eq!(csv.headers[0], "time");
204        assert_eq!(csv.record_count(), 2);
205    }
206    #[test]
207    fn test_from_str_empty_lines_ignored() {
208        let s = "a,b\n1,2\n\n3,4\n";
209        let csv = CsvFile::from_str(s).unwrap();
210        assert_eq!(csv.record_count(), 2);
211    }
212    #[test]
213    fn test_from_str_empty_input() {
214        assert!(CsvFile::from_str("").is_err());
215    }
216    #[test]
217    fn test_filter_rows_positive() {
218        let mut csv = CsvFile::new(vec!["v".into()]);
219        csv.add_record_f64(&[-1.0]);
220        csv.add_record_f64(&[2.0]);
221        csv.add_record_f64(&[3.0]);
222        let filtered = csv.filter_rows(0, |v| v > 0.0);
223        assert_eq!(filtered.record_count(), 2);
224    }
225    #[test]
226    fn test_filter_rows_none_match() {
227        let mut csv = CsvFile::new(vec!["v".into()]);
228        csv.add_record_f64(&[1.0]);
229        let filtered = csv.filter_rows(0, |v| v > 100.0);
230        assert_eq!(filtered.record_count(), 0);
231    }
232    #[test]
233    fn test_filter_rows_preserves_headers() {
234        let mut csv = CsvFile::new(vec!["t".into(), "x".into()]);
235        csv.add_record_f64(&[0.0, 1.0]);
236        let filtered = csv.filter_rows(0, |_| true);
237        assert_eq!(filtered.headers, vec!["t", "x"]);
238    }
239    #[test]
240    fn test_write_timeseries_format() {
241        let s = write_timeseries(&[0.0, 1.0], &[10.0, 20.0], "time", "pos");
242        assert!(s.starts_with("time,pos\n"));
243        assert!(s.contains("0,10") || s.contains("0.0") || s.contains("10"));
244    }
245    #[test]
246    fn test_parse_column_f64_by_name() {
247        let mut csv = CsvFile::new(vec!["t".into(), "e".into()]);
248        csv.add_record_f64(&[0.0, 5.0]);
249        csv.add_record_f64(&[1.0, 6.0]);
250        let col = parse_column_f64(&csv, "e").unwrap();
251        assert_eq!(col, vec![5.0, 6.0]);
252    }
253    #[test]
254    fn test_parse_column_f64_missing_name() {
255        let csv = CsvFile::new(vec!["t".into()]);
256        assert!(parse_column_f64(&csv, "nope").is_err());
257    }
258    #[test]
259    fn test_multiple_columns_round_trip() {
260        let mut csv = CsvFile::new(vec!["x".into(), "y".into(), "z".into()]);
261        csv.add_record_f64(&[1.0, 2.0, 3.0]);
262        let s = csv.to_string();
263        let parsed = CsvFile::from_str(&s).unwrap();
264        let x = parsed.get_column_f64(0).unwrap();
265        let y = parsed.get_column_f64(1).unwrap();
266        let z = parsed.get_column_f64(2).unwrap();
267        assert!((x[0] - 1.0).abs() < 1e-12);
268        assert!((y[0] - 2.0).abs() < 1e-12);
269        assert!((z[0] - 3.0).abs() < 1e-12);
270    }
271    #[test]
272    fn test_infer_column_type_integer() {
273        let mut csv = CsvFile::new(vec!["a".into()]);
274        csv.add_record(vec!["1".into()]);
275        csv.add_record(vec!["2".into()]);
276        csv.add_record(vec!["-10".into()]);
277        assert_eq!(csv.infer_column_type(0), ColumnType::Integer);
278    }
279    #[test]
280    fn test_infer_column_type_float() {
281        let mut csv = CsvFile::new(vec!["a".into()]);
282        csv.add_record(vec!["1.5".into()]);
283        csv.add_record(vec!["2.7".into()]);
284        assert_eq!(csv.infer_column_type(0), ColumnType::Float);
285    }
286    #[test]
287    fn test_infer_column_type_text() {
288        let mut csv = CsvFile::new(vec!["a".into()]);
289        csv.add_record(vec!["hello".into()]);
290        csv.add_record(vec!["world".into()]);
291        assert_eq!(csv.infer_column_type(0), ColumnType::Text);
292    }
293    #[test]
294    fn test_infer_column_type_mixed_int_float() {
295        let mut csv = CsvFile::new(vec!["a".into()]);
296        csv.add_record(vec!["1".into()]);
297        csv.add_record(vec!["2.5".into()]);
298        assert_eq!(csv.infer_column_type(0), ColumnType::Float);
299    }
300    #[test]
301    fn test_infer_column_type_empty() {
302        let csv = CsvFile::new(vec!["a".into()]);
303        assert_eq!(csv.infer_column_type(0), ColumnType::Text);
304    }
305    #[test]
306    fn test_infer_column_type_out_of_range() {
307        let csv = CsvFile::new(vec!["a".into()]);
308        assert_eq!(csv.infer_column_type(99), ColumnType::Text);
309    }
310    #[test]
311    fn test_select_columns_by_index() {
312        let mut csv = CsvFile::new(vec!["a".into(), "b".into(), "c".into()]);
313        csv.add_record(vec!["1".into(), "2".into(), "3".into()]);
314        let subset = csv.select_columns(&[0, 2]);
315        assert_eq!(subset.headers, vec!["a", "c"]);
316        assert_eq!(subset.records[0].fields, vec!["1", "3"]);
317    }
318    #[test]
319    fn test_select_columns_by_name() {
320        let mut csv = CsvFile::new(vec!["time".into(), "x".into(), "y".into()]);
321        csv.add_record(vec!["0".into(), "1.0".into(), "2.0".into()]);
322        let subset = csv.select_columns_by_name(&["y", "time"]);
323        assert_eq!(subset.headers, vec!["y", "time"]);
324        assert_eq!(subset.records[0].fields, vec!["2.0", "0"]);
325    }
326    #[test]
327    fn test_select_columns_missing_name_ignored() {
328        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
329        csv.add_record(vec!["1".into(), "2".into()]);
330        let subset = csv.select_columns_by_name(&["a", "missing"]);
331        assert_eq!(subset.headers, vec!["a"]);
332    }
333    #[test]
334    fn test_normalize_headers() {
335        let mut csv = CsvFile::new(vec![
336            " Time Step ".into(),
337            "X Position".into(),
338            "energy (J)".into(),
339        ]);
340        csv.normalize_headers();
341        assert_eq!(csv.headers[0], "time_step");
342        assert_eq!(csv.headers[1], "x_position");
343        assert_eq!(csv.headers[2], "energy__j_");
344    }
345    #[test]
346    fn test_column_stats_basic() {
347        let mut csv = CsvFile::new(vec!["v".into()]);
348        csv.add_record_f64(&[1.0]);
349        csv.add_record_f64(&[3.0]);
350        csv.add_record_f64(&[5.0]);
351        let stats = csv.column_stats(0).unwrap();
352        assert!((stats.min - 1.0).abs() < 1e-12);
353        assert!((stats.max - 5.0).abs() < 1e-12);
354        assert!((stats.mean - 3.0).abs() < 1e-12);
355        assert_eq!(stats.count, 3);
356        assert!((stats.sum - 9.0).abs() < 1e-12);
357    }
358    #[test]
359    fn test_column_stats_single_value() {
360        let mut csv = CsvFile::new(vec!["v".into()]);
361        csv.add_record_f64(&[42.0]);
362        let stats = csv.column_stats(0).unwrap();
363        assert!((stats.min - 42.0).abs() < 1e-12);
364        assert!((stats.max - 42.0).abs() < 1e-12);
365        assert!((stats.mean - 42.0).abs() < 1e-12);
366    }
367    #[test]
368    fn test_column_stats_negative_values() {
369        let mut csv = CsvFile::new(vec!["v".into()]);
370        csv.add_record_f64(&[-10.0]);
371        csv.add_record_f64(&[-5.0]);
372        csv.add_record_f64(&[0.0]);
373        let stats = csv.column_stats(0).unwrap();
374        assert!((stats.min - (-10.0)).abs() < 1e-12);
375        assert!((stats.max - 0.0).abs() < 1e-12);
376        assert!((stats.mean - (-5.0)).abs() < 1e-12);
377    }
378    #[test]
379    fn test_column_stats_text_column_returns_none() {
380        let mut csv = CsvFile::new(vec!["name".into()]);
381        csv.add_record(vec!["alice".into()]);
382        assert!(csv.column_stats(0).is_none());
383    }
384    #[test]
385    fn test_all_column_stats() {
386        let mut csv = CsvFile::new(vec!["x".into(), "label".into(), "y".into()]);
387        csv.add_record(vec!["1.0".into(), "a".into(), "10.0".into()]);
388        csv.add_record(vec!["2.0".into(), "b".into(), "20.0".into()]);
389        let stats = csv.all_column_stats();
390        assert_eq!(stats.len(), 2);
391        assert_eq!(stats[0].0, "x");
392        assert_eq!(stats[1].0, "y");
393    }
394    #[test]
395    fn test_detect_delimiter_comma() {
396        let s = "a,b,c\n1,2,3\n4,5,6\n";
397        assert_eq!(detect_delimiter(s), ',');
398    }
399    #[test]
400    fn test_detect_delimiter_tab() {
401        let s = "a\tb\tc\n1\t2\t3\n4\t5\t6\n";
402        assert_eq!(detect_delimiter(s), '\t');
403    }
404    #[test]
405    fn test_detect_delimiter_semicolon() {
406        let s = "a;b;c\n1;2;3\n4;5;6\n";
407        assert_eq!(detect_delimiter(s), ';');
408    }
409    #[test]
410    fn test_detect_delimiter_pipe() {
411        let s = "a|b|c\n1|2|3\n";
412        assert_eq!(detect_delimiter(s), '|');
413    }
414    #[test]
415    fn test_parse_auto_tab() {
416        let s = "time\tvalue\n0.0\t1.0\n1.0\t2.0\n";
417        let csv = parse_auto(s).unwrap();
418        assert_eq!(csv.headers, vec!["time", "value"]);
419        assert_eq!(csv.record_count(), 2);
420    }
421    #[test]
422    fn test_parse_auto_semicolon() {
423        let s = "x;y\n1;2\n3;4\n";
424        let csv = parse_auto(s).unwrap();
425        assert_eq!(csv.headers, vec!["x", "y"]);
426        let col = csv.get_column_f64(1).unwrap();
427        assert_eq!(col, vec![2.0, 4.0]);
428    }
429    #[test]
430    fn test_read_chunked_basic() {
431        let s = "x\n1\n2\n3\n4\n5\n";
432        let chunks = read_chunked(s, 2);
433        assert_eq!(chunks.len(), 3);
434        assert_eq!(chunks[0].record_count(), 2);
435        assert_eq!(chunks[1].record_count(), 2);
436        assert_eq!(chunks[2].record_count(), 1);
437    }
438    #[test]
439    fn test_read_chunked_exact_multiple() {
440        let s = "x\n1\n2\n3\n4\n";
441        let chunks = read_chunked(s, 2);
442        assert_eq!(chunks.len(), 2);
443        assert_eq!(chunks[0].record_count(), 2);
444        assert_eq!(chunks[1].record_count(), 2);
445    }
446    #[test]
447    fn test_read_chunked_larger_than_data() {
448        let s = "x\n1\n2\n";
449        let chunks = read_chunked(s, 100);
450        assert_eq!(chunks.len(), 1);
451        assert_eq!(chunks[0].record_count(), 2);
452    }
453    #[test]
454    fn test_read_chunked_preserves_headers() {
455        let s = "a,b\n1,2\n3,4\n5,6\n";
456        let chunks = read_chunked(s, 2);
457        for chunk in &chunks {
458            assert_eq!(chunk.headers, vec!["a", "b"]);
459        }
460    }
461    #[test]
462    fn test_to_string_with_delimiter() {
463        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
464        csv.add_record(vec!["1".into(), "2".into()]);
465        let s = csv.to_string_with_delimiter(';');
466        assert!(s.starts_with("a;b\n"));
467        assert!(s.contains("1;2"));
468    }
469    #[test]
470    fn test_from_str_with_delimiter() {
471        let s = "x;y\n1;2\n3;4\n";
472        let csv = CsvFile::from_str_with_delimiter(s, ';').unwrap();
473        assert_eq!(csv.headers, vec!["x", "y"]);
474        assert_eq!(csv.record_count(), 2);
475    }
476    #[test]
477    fn test_get_column_i64() {
478        let mut csv = CsvFile::new(vec!["n".into()]);
479        csv.add_record(vec!["42".into()]);
480        csv.add_record(vec!["-7".into()]);
481        let col = csv.get_column_i64(0).unwrap();
482        assert_eq!(col, vec![42, -7]);
483    }
484    #[test]
485    fn test_get_column_i64_parse_error() {
486        let mut csv = CsvFile::new(vec!["n".into()]);
487        csv.add_record(vec!["1.5".into()]);
488        assert!(csv.get_column_i64(0).is_err());
489    }
490    #[test]
491    fn test_get_column_strings() {
492        let mut csv = CsvFile::new(vec!["name".into()]);
493        csv.add_record(vec!["alice".into()]);
494        csv.add_record(vec!["bob".into()]);
495        let col = csv.get_column_strings(0).unwrap();
496        assert_eq!(col, vec!["alice", "bob"]);
497    }
498    #[test]
499    fn test_sort_by_column() {
500        let mut csv = CsvFile::new(vec!["v".into()]);
501        csv.add_record_f64(&[3.0]);
502        csv.add_record_f64(&[1.0]);
503        csv.add_record_f64(&[2.0]);
504        csv.sort_by_column(0);
505        let col = csv.get_column_f64(0).unwrap();
506        assert_eq!(col, vec![1.0, 2.0, 3.0]);
507    }
508    #[test]
509    fn test_sort_by_column_already_sorted() {
510        let mut csv = CsvFile::new(vec!["v".into()]);
511        csv.add_record_f64(&[1.0]);
512        csv.add_record_f64(&[2.0]);
513        csv.sort_by_column(0);
514        let col = csv.get_column_f64(0).unwrap();
515        assert_eq!(col, vec![1.0, 2.0]);
516    }
517    #[test]
518    fn test_normalize_header_fn() {
519        assert_eq!(normalize_header(" Time Step "), "time_step");
520        assert_eq!(normalize_header("X(m/s)"), "x_m_s_");
521        assert_eq!(normalize_header("abc_def"), "abc_def");
522    }
523    #[test]
524    fn test_delimiter_roundtrip_semicolon() {
525        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
526        csv.add_record_f64(&[1.0, 2.0]);
527        let s = csv.to_string_with_delimiter(';');
528        let parsed = CsvFile::from_str_with_delimiter(&s, ';').unwrap();
529        assert_eq!(parsed.headers, vec!["a", "b"]);
530        let col = parsed.get_column_f64(0).unwrap();
531        assert!((col[0] - 1.0).abs() < 1e-12);
532    }
533    #[test]
534    fn test_detect_delimiter_empty() {
535        assert_eq!(detect_delimiter(""), ',');
536    }
537    #[test]
538    fn test_chunked_empty_input() {
539        let chunks = read_chunked("", 5);
540        assert!(chunks.is_empty());
541    }
542    #[test]
543    fn test_select_columns_empty_indices() {
544        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
545        csv.add_record(vec!["1".into(), "2".into()]);
546        let subset = csv.select_columns(&[]);
547        assert!(subset.headers.is_empty());
548        assert_eq!(subset.records[0].fields.len(), 0);
549    }
550    #[test]
551    fn schema_validate_ok() {
552        let schema = CsvSchema::new(vec![
553            ("x".into(), ColumnType::Float),
554            ("label".into(), ColumnType::Text),
555        ]);
556        let mut csv = CsvFile::new(vec!["x".into(), "label".into()]);
557        csv.add_record(vec!["3.14".into(), "hello".into()]);
558        let errors = schema.validate(&csv);
559        assert!(errors.is_empty(), "unexpected errors: {errors:?}");
560    }
561    #[test]
562    fn schema_validate_type_mismatch() {
563        let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
564        let mut csv = CsvFile::new(vec!["x".into()]);
565        csv.add_record(vec!["not_an_int".into()]);
566        let errors = schema.validate(&csv);
567        assert!(!errors.is_empty(), "should report type error");
568    }
569    #[test]
570    fn schema_validate_column_count_mismatch() {
571        let schema = CsvSchema::new(vec![
572            ("a".into(), ColumnType::Float),
573            ("b".into(), ColumnType::Float),
574        ]);
575        let csv = CsvFile::new(vec!["a".into()]);
576        let errors = schema.validate(&csv);
577        assert!(!errors.is_empty());
578    }
579    #[test]
580    fn schema_validate_name_mismatch() {
581        let schema = CsvSchema::new(vec![("expected".into(), ColumnType::Text)]);
582        let csv = CsvFile::new(vec!["actual".into()]);
583        let errors = schema.validate(&csv);
584        assert!(!errors.is_empty());
585    }
586    #[test]
587    fn time_series_times_extracted() {
588        let input = "time,temp\n0.0,300.0\n1.0,301.0\n2.0,302.0\n";
589        let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
590        let times = ts.times().unwrap();
591        assert_eq!(times, vec![0.0, 1.0, 2.0]);
592    }
593    #[test]
594    fn time_series_duration() {
595        let input = "time,v\n1.0,0.0\n3.0,1.0\n5.0,2.0\n";
596        let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
597        assert!((ts.duration() - 4.0).abs() < 1e-10);
598    }
599    #[test]
600    fn time_series_n_steps() {
601        let input = "time,v\n0.0,1.0\n0.5,2.0\n";
602        let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
603        assert_eq!(ts.n_steps(), 2);
604    }
605    #[test]
606    fn time_series_missing_column() {
607        let input = "x,y\n1.0,2.0\n";
608        let ts = TimeSeriesCsv::from_str(input, "time").unwrap();
609        assert!(ts.times().is_none());
610    }
611    fn sample_csv() -> CsvFile {
612        let mut csv = CsvFile::new(vec!["v".into()]);
613        csv.add_record_f64(&[1.0]);
614        csv.add_record_f64(&[2.0]);
615        csv.add_record_f64(&[3.0]);
616        csv.add_record_f64(&[4.0]);
617        csv
618    }
619    #[test]
620    fn aggregate_sum() {
621        let csv = sample_csv();
622        let s = aggregate_column(&csv, 0, AggOp::Sum).unwrap();
623        assert!((s - 10.0).abs() < 1e-10);
624    }
625    #[test]
626    fn aggregate_mean() {
627        let csv = sample_csv();
628        let m = aggregate_column(&csv, 0, AggOp::Mean).unwrap();
629        assert!((m - 2.5).abs() < 1e-10);
630    }
631    #[test]
632    fn aggregate_min_max() {
633        let csv = sample_csv();
634        assert!((aggregate_column(&csv, 0, AggOp::Min).unwrap() - 1.0).abs() < 1e-10);
635        assert!((aggregate_column(&csv, 0, AggOp::Max).unwrap() - 4.0).abs() < 1e-10);
636    }
637    #[test]
638    fn aggregate_count() {
639        let csv = sample_csv();
640        assert!((aggregate_column(&csv, 0, AggOp::Count).unwrap() - 4.0).abs() < 1e-10);
641    }
642    #[test]
643    fn aggregate_std() {
644        let csv = sample_csv();
645        let std = aggregate_column(&csv, 0, AggOp::Std).unwrap();
646        assert!((std - 1.25f64.sqrt()).abs() < 1e-9);
647    }
648    #[test]
649    fn aggregate_out_of_range() {
650        let csv = sample_csv();
651        assert!(aggregate_column(&csv, 99, AggOp::Sum).is_none());
652    }
653    #[test]
654    fn csv_writer_basic() {
655        let mut w = CsvWriter::new(vec!["x".into(), "y".into()], ',');
656        w.write_row(&["1", "2"]);
657        w.write_row(&["3", "4"]);
658        let s = w.finish();
659        assert!(s.starts_with("x,y\n"));
660        assert!(s.contains("1,2"));
661    }
662    #[test]
663    fn csv_writer_f64() {
664        let mut w = CsvWriter::new(vec!["val".into()], ',');
665        w.write_row_f64(&[2.54321]);
666        let s = w.finish();
667        assert!(s.contains("2.543210"));
668    }
669    #[test]
670    fn csv_writer_row_count() {
671        let mut w = CsvWriter::new(vec!["a".into()], ',');
672        for _ in 0..5 {
673            w.write_row(&["x"]);
674        }
675        assert_eq!(w.row_count(), 5);
676    }
677    #[test]
678    fn csv_writer_semicolon_delimiter() {
679        let mut w = CsvWriter::new(vec!["a".into(), "b".into()], ';');
680        w.write_row(&["1", "2"]);
681        let s = w.finish();
682        assert!(s.contains("a;b"));
683        assert!(s.contains("1;2"));
684    }
685    #[test]
686    fn validation_report_valid() {
687        let schema = CsvSchema::new(vec![("x".into(), ColumnType::Float)]);
688        let mut csv = CsvFile::new(vec!["x".into()]);
689        csv.add_record_f64(&[1.0]);
690        let report = validate_csv(&csv, &schema);
691        assert!(report.is_valid());
692        assert_eq!(report.error_count(), 0);
693    }
694    #[test]
695    fn validation_report_invalid() {
696        let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
697        let mut csv = CsvFile::new(vec!["x".into()]);
698        csv.add_record(vec!["hello".into()]);
699        let report = validate_csv(&csv, &schema);
700        assert!(!report.is_valid());
701        assert!(report.error_count() > 0);
702    }
703    #[test]
704    fn lazy_iter_yields_rows() {
705        let input = "a,b,c\n1,2,3\n4,5,6\n";
706        let mut iter = LazyCsvIter::new(input, ',');
707        assert_eq!(iter.headers, vec!["a", "b", "c"]);
708        let r1 = iter.next().unwrap();
709        assert_eq!(r1, vec!["1", "2", "3"]);
710        let r2 = iter.next().unwrap();
711        assert_eq!(r2, vec!["4", "5", "6"]);
712        assert!(iter.next().is_none());
713    }
714    #[test]
715    fn lazy_iter_empty_input() {
716        let mut iter = LazyCsvIter::new("", ',');
717        assert!(iter.headers.is_empty());
718        assert!(iter.next().is_none());
719    }
720    #[test]
721    fn lazy_iter_header_only() {
722        let mut iter = LazyCsvIter::new("x,y\n", ',');
723        assert_eq!(iter.headers, vec!["x", "y"]);
724        assert!(iter.next().is_none());
725    }
726    #[test]
727    fn lazy_iter_semicolon_delimiter() {
728        let input = "a;b\n10;20\n";
729        let mut iter = LazyCsvIter::new(input, ';');
730        assert_eq!(iter.headers, vec!["a", "b"]);
731        let row = iter.next().unwrap();
732        assert_eq!(row, vec!["10", "20"]);
733    }
734}
735/// Write a sequence of trajectory frames to a CSV string.
736///
737/// Format: each frame is written as:
738/// ```text
739/// # frame_title
740/// x,y,z
741/// x,y,z
742/// ...
743/// <blank line>
744/// ```
745#[allow(dead_code)]
746pub fn write_trajectory_csv(frames: &[TrajectoryFrame]) -> String {
747    let mut out = String::new();
748    for frame in frames {
749        if !frame.title.is_empty() {
750            out.push_str(&format!("# {}\n", frame.title));
751        }
752        for pos in &frame.positions {
753            out.push_str(&format!("{},{},{}\n", pos[0], pos[1], pos[2]));
754        }
755        out.push('\n');
756    }
757    out
758}
759/// Read trajectory frames from a CSV string.
760///
761/// Each block of non-blank lines is one frame; lines starting with `#` are
762/// treated as the frame title.  Each data line must have exactly 3
763/// comma-separated values (`x,y,z`). Returns an error string with the line
764/// number on failure.
765#[allow(dead_code)]
766pub fn read_trajectory_csv(s: &str) -> std::result::Result<Vec<TrajectoryFrame>, String> {
767    let mut frames: Vec<TrajectoryFrame> = Vec::new();
768    let mut current = TrajectoryFrame::new();
769    let mut in_frame = false;
770    for (line_no, raw_line) in s.lines().enumerate() {
771        let line = raw_line.trim();
772        if line.is_empty() {
773            if in_frame {
774                frames.push(std::mem::take(&mut current));
775                in_frame = false;
776            }
777            continue;
778        }
779        if line.starts_with('#') {
780            current.title = line.trim_start_matches('#').trim().to_string();
781            in_frame = true;
782            continue;
783        }
784        let parts: Vec<&str> = line.split(',').collect();
785        if parts.len() != 3 {
786            return Err(format!(
787                "line {}: expected 3 comma-separated values, got {}",
788                line_no + 1,
789                parts.len()
790            ));
791        }
792        let x: f64 = parts[0]
793            .trim()
794            .parse()
795            .map_err(|e| format!("line {}: x parse error: {}", line_no + 1, e))?;
796        let y: f64 = parts[1]
797            .trim()
798            .parse()
799            .map_err(|e| format!("line {}: y parse error: {}", line_no + 1, e))?;
800        let z: f64 = parts[2]
801            .trim()
802            .parse()
803            .map_err(|e| format!("line {}: z parse error: {}", line_no + 1, e))?;
804        current.positions.push([x, y, z]);
805        in_frame = true;
806    }
807    if in_frame {
808        frames.push(current);
809    }
810    Ok(frames)
811}
812#[cfg(test)]
813mod tests_dataframe {
814    use super::*;
815    use crate::csv_io::types::*;
816    #[test]
817    fn dataframe_from_csv_types() {
818        let csv_str = "id,x,label\n1,3.14,hello\n2,2.71,world\n";
819        let df = CsvDataFrame::from_str(csv_str).unwrap();
820        assert_eq!(df.n_cols(), 3);
821        assert_eq!(df.n_rows(), 2);
822        assert_eq!(df.column(0).unwrap().column_type(), ColumnType::Integer);
823        assert_eq!(df.column(1).unwrap().column_type(), ColumnType::Float);
824        assert_eq!(df.column(2).unwrap().column_type(), ColumnType::Text);
825    }
826    #[test]
827    fn dataframe_float_column_by_name() {
828        let csv_str = "t,v\n0.0,1.5\n1.0,2.5\n";
829        let df = CsvDataFrame::from_str(csv_str).unwrap();
830        let v = df.float_column("v").unwrap();
831        assert_eq!(v.len(), 2);
832        assert!((v[0] - 1.5).abs() < 1e-12);
833        assert!((v[1] - 2.5).abs() < 1e-12);
834    }
835    #[test]
836    fn dataframe_integer_column_by_name() {
837        let csv_str = "n,label\n10,a\n20,b\n";
838        let df = CsvDataFrame::from_str(csv_str).unwrap();
839        let n = df.integer_column("n").unwrap();
840        assert_eq!(n, &vec![10_i64, 20_i64]);
841    }
842    #[test]
843    fn dataframe_text_column_by_name() {
844        let csv_str = "name,val\nalice,1.0\nbob,2.0\n";
845        let df = CsvDataFrame::from_str(csv_str).unwrap();
846        let names = df.text_column("name").unwrap();
847        assert_eq!(names, &vec!["alice".to_string(), "bob".to_string()]);
848    }
849    #[test]
850    fn dataframe_column_index_missing() {
851        let csv_str = "a,b\n1,2\n";
852        let df = CsvDataFrame::from_str(csv_str).unwrap();
853        assert!(df.column_index("nope").is_none());
854    }
855    #[test]
856    fn dataframe_to_csv_string_roundtrip() {
857        let csv_str = "x,y\n1.5,2.5\n3.5,4.5\n";
858        let df = CsvDataFrame::from_str(csv_str).unwrap();
859        let out = df.to_csv_string();
860        assert!(out.contains("x,y"));
861        let df2 = CsvDataFrame::from_str(&out).unwrap();
862        let x = df2.float_column("x").unwrap();
863        assert!((x[0] - 1.5).abs() < 1e-12);
864        assert!((x[1] - 3.5).abs() < 1e-12);
865    }
866    #[test]
867    fn dataframe_empty_input() {
868        assert!(CsvDataFrame::from_str("").is_err());
869    }
870    #[test]
871    fn dataframe_n_rows_n_cols() {
872        let csv_str = "a,b,c\n1,2,3\n4,5,6\n7,8,9\n";
873        let df = CsvDataFrame::from_str(csv_str).unwrap();
874        assert_eq!(df.n_rows(), 3);
875        assert_eq!(df.n_cols(), 3);
876    }
877    #[test]
878    fn dataframe_column_by_name_returns_none_for_missing() {
879        let csv_str = "x\n1.0\n";
880        let df = CsvDataFrame::from_str(csv_str).unwrap();
881        assert!(df.column_by_name("missing").is_none());
882    }
883    #[test]
884    fn dataframe_with_delimiter() {
885        let csv_str = "x;y\n1.0;2.0\n3.0;4.0\n";
886        let df = CsvDataFrame::from_str_with_delimiter(csv_str, ';').unwrap();
887        assert_eq!(df.n_cols(), 2);
888        let y = df.float_column("y").unwrap();
889        assert!((y[0] - 2.0).abs() < 1e-12);
890    }
891    #[test]
892    fn trajectory_write_read_roundtrip() {
893        let frames = vec![
894            TrajectoryFrame {
895                title: "frame 0".to_string(),
896                positions: vec![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
897            },
898            TrajectoryFrame {
899                title: "frame 1".to_string(),
900                positions: vec![[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
901            },
902        ];
903        let csv = write_trajectory_csv(&frames);
904        let parsed = read_trajectory_csv(&csv).unwrap();
905        assert_eq!(parsed.len(), 2);
906        assert_eq!(parsed[0].title, "frame 0");
907        assert_eq!(parsed[0].n_atoms(), 2);
908        assert!((parsed[0].positions[0][0] - 1.0).abs() < 1e-12);
909        assert!((parsed[1].positions[1][2] - 0.6).abs() < 1e-12);
910    }
911    #[test]
912    fn trajectory_single_frame_no_title() {
913        let csv = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
914        let frames = read_trajectory_csv(csv).unwrap();
915        assert_eq!(frames.len(), 1);
916        assert_eq!(frames[0].n_atoms(), 2);
917        assert!(frames[0].title.is_empty());
918    }
919    #[test]
920    fn trajectory_empty_input() {
921        let frames = read_trajectory_csv("").unwrap();
922        assert!(frames.is_empty());
923    }
924    #[test]
925    fn trajectory_error_on_bad_value() {
926        let csv = "1.0,not_a_float,3.0\n";
927        assert!(read_trajectory_csv(csv).is_err());
928    }
929    #[test]
930    fn trajectory_error_on_wrong_column_count() {
931        let csv = "1.0,2.0\n";
932        assert!(read_trajectory_csv(csv).is_err());
933    }
934    #[test]
935    fn trajectory_multiple_frames_position_accuracy() {
936        let frames = vec![TrajectoryFrame {
937            title: String::new(),
938            positions: vec![[0.123456789, -0.987654321, 1.111111111]],
939        }];
940        let csv = write_trajectory_csv(&frames);
941        let parsed = read_trajectory_csv(&csv).unwrap();
942        assert!((parsed[0].positions[0][0] - 0.123456789).abs() < 1e-9);
943        assert!((parsed[0].positions[0][1] - (-0.987654321)).abs() < 1e-9);
944        assert!((parsed[0].positions[0][2] - 1.111111111).abs() < 1e-9);
945    }
946    #[test]
947    fn trajectory_frame_n_atoms() {
948        let f = TrajectoryFrame {
949            title: "t".into(),
950            positions: vec![[0.0; 3]; 5],
951        };
952        assert_eq!(f.n_atoms(), 5);
953    }
954    #[test]
955    fn csv_column_data_len_and_type() {
956        let col = CsvColumnData::Float(vec![1.0, 2.0, 3.0]);
957        assert_eq!(col.len(), 3);
958        assert!(!col.is_empty());
959        assert_eq!(col.column_type(), ColumnType::Float);
960        let int_col = CsvColumnData::Integer(vec![10, 20]);
961        assert_eq!(int_col.len(), 2);
962        assert_eq!(int_col.column_type(), ColumnType::Integer);
963        let text_col = CsvColumnData::Text(vec!["a".into()]);
964        assert_eq!(text_col.column_type(), ColumnType::Text);
965        assert!(!text_col.is_empty());
966    }
967}
968/// Merge two [`CsvFile`] objects that share the same column schema.
969///
970/// Rows from `other` are appended after the rows of `base`.
971/// Returns `Err` if the header lists differ.
972#[allow(dead_code)]
973pub fn merge_csv_files(base: &CsvFile, other: &CsvFile) -> Result<CsvFile, String> {
974    if base.headers != other.headers {
975        return Err(format!(
976            "header mismatch: {:?} vs {:?}",
977            base.headers, other.headers
978        ));
979    }
980    let mut result = CsvFile::new(base.headers.clone());
981    for rec in &base.records {
982        result.records.push(CsvRecord {
983            fields: rec.fields.clone(),
984        });
985    }
986    for rec in &other.records {
987        result.records.push(CsvRecord {
988            fields: rec.fields.clone(),
989        });
990    }
991    Ok(result)
992}
993/// Transpose a [`CsvFile`]: rows become columns and columns become rows.
994///
995/// The resulting file has column headers `"col_0"`, `"col_1"`, …
996/// (original row indices), and each new row corresponds to an original column.
997#[allow(dead_code)]
998pub fn transpose_csv(csv: &CsvFile) -> CsvFile {
999    let n_rows = csv.records.len();
1000    let n_cols = csv.headers.len();
1001    if n_rows == 0 || n_cols == 0 {
1002        return CsvFile::new(vec![]);
1003    }
1004    let new_headers: Vec<String> = (0..n_rows).map(|i| format!("col_{}", i)).collect();
1005    let mut result = CsvFile::new(new_headers);
1006    for col in 0..n_cols {
1007        let fields: Vec<String> = (0..n_rows)
1008            .map(|row| {
1009                csv.records[row]
1010                    .fields
1011                    .get(col)
1012                    .cloned()
1013                    .unwrap_or_default()
1014            })
1015            .collect();
1016        result.records.push(CsvRecord { fields });
1017    }
1018    result
1019}
1020/// Inner-join two [`CsvFile`]s on a shared key column.
1021///
1022/// Rows are matched when the key column value is equal (string comparison).
1023/// The output contains all columns from `left` followed by the non-key columns
1024/// of `right`.
1025#[allow(dead_code)]
1026pub fn inner_join_csv(left: &CsvFile, right: &CsvFile, key: &str) -> Result<CsvFile, String> {
1027    let left_key_idx = left
1028        .get_column_by_name(key)
1029        .ok_or_else(|| format!("key '{}' not in left file", key))?;
1030    let right_key_idx = right
1031        .get_column_by_name(key)
1032        .ok_or_else(|| format!("key '{}' not in right file", key))?;
1033    let mut headers = left.headers.clone();
1034    for (i, h) in right.headers.iter().enumerate() {
1035        if i != right_key_idx {
1036            headers.push(h.clone());
1037        }
1038    }
1039    let mut result = CsvFile::new(headers);
1040    for l_rec in &left.records {
1041        let l_key = l_rec.fields.get(left_key_idx).cloned().unwrap_or_default();
1042        for r_rec in &right.records {
1043            let r_key = r_rec.fields.get(right_key_idx).cloned().unwrap_or_default();
1044            if l_key == r_key {
1045                let mut fields = l_rec.fields.clone();
1046                for (i, f) in r_rec.fields.iter().enumerate() {
1047                    if i != right_key_idx {
1048                        fields.push(f.clone());
1049                    }
1050                }
1051                result.records.push(CsvRecord { fields });
1052            }
1053        }
1054    }
1055    Ok(result)
1056}
1057/// Compute the per-cell numeric difference between two [`CsvFile`]s.
1058///
1059/// Both files must have the same dimensions and numeric-only columns.
1060/// Returns `Err` on dimension mismatch or parse failure.
1061#[allow(dead_code)]
1062pub fn diff_csv(a: &CsvFile, b: &CsvFile) -> Result<CsvFile, String> {
1063    if a.headers != b.headers {
1064        return Err("header mismatch".to_string());
1065    }
1066    if a.records.len() != b.records.len() {
1067        return Err(format!(
1068            "row count mismatch: {} vs {}",
1069            a.records.len(),
1070            b.records.len()
1071        ));
1072    }
1073    let mut result = CsvFile::new(a.headers.clone());
1074    for (i, (ar, br)) in a.records.iter().zip(b.records.iter()).enumerate() {
1075        let mut fields = Vec::with_capacity(a.headers.len());
1076        for (j, (af, bf)) in ar.fields.iter().zip(br.fields.iter()).enumerate() {
1077            let av: f64 = af
1078                .parse()
1079                .map_err(|_| format!("row {} col {}: not numeric", i, j))?;
1080            let bv: f64 = bf
1081                .parse()
1082                .map_err(|_| format!("row {} col {}: not numeric", i, j))?;
1083            fields.push(format!("{}", av - bv));
1084        }
1085        result.records.push(CsvRecord { fields });
1086    }
1087    Ok(result)
1088}
1089/// Detect whether the first row of a CSV string is likely a header row.
1090///
1091/// A row is considered a header if it contains at least one non-numeric,
1092/// non-empty token (i.e., any field that cannot be parsed as `f64`).
1093#[allow(dead_code)]
1094pub fn has_header(s: &str, delim: char) -> bool {
1095    let first = s.lines().next().unwrap_or("");
1096    first.split(delim).any(|f| {
1097        let t = f.trim();
1098        !t.is_empty() && t.parse::<f64>().is_err()
1099    })
1100}
1101/// Infer headers for a headerless CSV.
1102///
1103/// Returns `"col_0"`, `"col_1"`, … based on the number of fields in the
1104/// first data row.
1105#[allow(dead_code)]
1106pub fn infer_headers(s: &str, delim: char) -> Vec<String> {
1107    let n = s
1108        .lines()
1109        .next()
1110        .map(|l| l.split(delim).count())
1111        .unwrap_or(0);
1112    (0..n).map(|i| format!("col_{}", i)).collect()
1113}
1114/// Parse a CSV string that may or may not have a header row.
1115///
1116/// If `auto_header` is `true` (default behaviour), header presence is
1117/// inferred using [`has_header`].  If no header is detected, synthetic
1118/// column names `col_0`, `col_1`, … are used.
1119#[allow(dead_code)]
1120pub fn parse_smart(s: &str) -> Result<CsvFile, String> {
1121    let delim = detect_delimiter(s);
1122    if has_header(s, delim) {
1123        CsvFile::from_str_with_delimiter(s, delim)
1124    } else {
1125        let headers = infer_headers(s, delim);
1126        let mut result = CsvFile::new(headers);
1127        for line in s.lines() {
1128            if line.trim().is_empty() {
1129                continue;
1130            }
1131            let fields: Vec<String> = line.split(delim).map(str::trim).map(String::from).collect();
1132            result.add_record(fields);
1133        }
1134        Ok(result)
1135    }
1136}
1137/// Infer whether a column can be interpreted as boolean.
1138///
1139/// Recognised truthy values: `"true"`, `"1"`, `"yes"`, `"on"` (case-insensitive).
1140/// Recognised falsy values: `"false"`, `"0"`, `"no"`, `"off"`.
1141/// Any other value → not boolean.
1142#[allow(dead_code)]
1143pub fn is_boolean_column(csv: &CsvFile, col_idx: usize) -> bool {
1144    let truthy = ["true", "1", "yes", "on"];
1145    let falsy = ["false", "0", "no", "off"];
1146    if col_idx >= csv.headers.len() {
1147        return false;
1148    }
1149    for rec in &csv.records {
1150        if let Some(v) = rec.fields.get(col_idx) {
1151            let lower = v.trim().to_lowercase();
1152            if !truthy.contains(&lower.as_str()) && !falsy.contains(&lower.as_str()) {
1153                return false;
1154            }
1155        }
1156    }
1157    true
1158}
1159/// Parse a boolean column as `Vec`bool`.
1160///
1161/// Returns `Err` if any value is not recognisable as boolean.
1162#[allow(dead_code)]
1163pub fn get_column_bool(csv: &CsvFile, col_idx: usize) -> Result<Vec<bool>, String> {
1164    let truthy = ["true", "1", "yes", "on"];
1165    let falsy = ["false", "0", "no", "off"];
1166    if col_idx >= csv.headers.len() {
1167        return Err(format!("column index {} out of range", col_idx));
1168    }
1169    let mut out = Vec::with_capacity(csv.records.len());
1170    for (row, rec) in csv.records.iter().enumerate() {
1171        let raw = rec
1172            .fields
1173            .get(col_idx)
1174            .ok_or_else(|| format!("row {} has no field at col {}", row, col_idx))?;
1175        let lower = raw.trim().to_lowercase();
1176        if truthy.contains(&lower.as_str()) {
1177            out.push(true);
1178        } else if falsy.contains(&lower.as_str()) {
1179            out.push(false);
1180        } else {
1181            return Err(format!("row {}: '{}' is not a boolean value", row, raw));
1182        }
1183    }
1184    Ok(out)
1185}
1186/// Sample every `n`-th row from a [`CsvFile`].
1187///
1188/// The header is preserved. `stride = 1` returns all rows unchanged.
1189#[allow(dead_code)]
1190pub fn sample_every_nth(csv: &CsvFile, stride: usize) -> CsvFile {
1191    if stride == 0 {
1192        return CsvFile::new(csv.headers.clone());
1193    }
1194    let mut result = CsvFile::new(csv.headers.clone());
1195    for (i, rec) in csv.records.iter().enumerate() {
1196        if i % stride == 0 {
1197            result.records.push(CsvRecord {
1198                fields: rec.fields.clone(),
1199            });
1200        }
1201    }
1202    result
1203}
1204/// Deduplicate rows in a [`CsvFile`] based on a key column (keep first occurrence).
1205#[allow(dead_code)]
1206pub fn dedup_by_column(csv: &CsvFile, col_idx: usize) -> CsvFile {
1207    use std::collections::HashSet;
1208    let mut seen: HashSet<String> = HashSet::new();
1209    let mut result = CsvFile::new(csv.headers.clone());
1210    for rec in &csv.records {
1211        let key = rec.fields.get(col_idx).cloned().unwrap_or_default();
1212        if seen.insert(key) {
1213            result.records.push(CsvRecord {
1214                fields: rec.fields.clone(),
1215            });
1216        }
1217    }
1218    result
1219}
1220#[cfg(test)]
1221mod tests_csv_new {
1222    use super::*;
1223    use crate::csv_io::types::*;
1224    #[test]
1225    fn streaming_reader_headers() {
1226        let input = "time,x,y\n0.0,1.0,2.0\n1.0,3.0,4.0\n";
1227        let r = StreamingCsvReader::new(input, ',');
1228        assert_eq!(r.headers, vec!["time", "x", "y"]);
1229        assert_eq!(r.n_cols(), 3);
1230    }
1231    #[test]
1232    fn streaming_reader_next_row() {
1233        let input = "a,b\n1,2\n3,4\n";
1234        let mut r = StreamingCsvReader::new(input, ',');
1235        let row = r.next_row().unwrap();
1236        assert_eq!(row, vec!["1", "2"]);
1237        assert_eq!(r.current_row(), 1);
1238        let row2 = r.next_row().unwrap();
1239        assert_eq!(row2, vec!["3", "4"]);
1240    }
1241    #[test]
1242    fn streaming_reader_eof() {
1243        let input = "a\n1\n";
1244        let mut r = StreamingCsvReader::new(input, ',');
1245        r.next_row();
1246        assert!(r.next_row().is_none());
1247    }
1248    #[test]
1249    fn streaming_reader_skips_blank_lines() {
1250        let input = "a\n1\n\n2\n";
1251        let mut r = StreamingCsvReader::new(input, ',');
1252        r.next_row();
1253        let row = r.next_row().unwrap();
1254        assert_eq!(row, vec!["2"]);
1255    }
1256    #[test]
1257    fn streaming_reader_collect_all() {
1258        let input = "x,y\n1,2\n3,4\n5,6\n";
1259        let r = StreamingCsvReader::new(input, ',');
1260        let csv = r.collect_all();
1261        assert_eq!(csv.record_count(), 3);
1262        assert_eq!(csv.headers, vec!["x", "y"]);
1263    }
1264    #[test]
1265    fn streaming_reader_auto_delimiter() {
1266        let input = "a\tb\tc\n1\t2\t3\n";
1267        let r = StreamingCsvReader::auto(input);
1268        assert_eq!(r.delimiter, '\t');
1269        assert_eq!(r.headers, vec!["a", "b", "c"]);
1270    }
1271    #[test]
1272    fn merge_csv_files_basic() {
1273        let mut a = CsvFile::new(vec!["x".into()]);
1274        a.add_record_f64(&[1.0]);
1275        let mut b = CsvFile::new(vec!["x".into()]);
1276        b.add_record_f64(&[2.0]);
1277        b.add_record_f64(&[3.0]);
1278        let merged = merge_csv_files(&a, &b).unwrap();
1279        assert_eq!(merged.record_count(), 3);
1280    }
1281    #[test]
1282    fn merge_csv_files_header_mismatch() {
1283        let a = CsvFile::new(vec!["x".into()]);
1284        let b = CsvFile::new(vec!["y".into()]);
1285        assert!(merge_csv_files(&a, &b).is_err());
1286    }
1287    #[test]
1288    fn transpose_basic() {
1289        let mut csv = CsvFile::new(vec!["a".into(), "b".into()]);
1290        csv.add_record(vec!["1".into(), "2".into()]);
1291        csv.add_record(vec!["3".into(), "4".into()]);
1292        let t = transpose_csv(&csv);
1293        assert_eq!(t.headers.len(), 2);
1294        assert_eq!(t.record_count(), 2);
1295        assert_eq!(t.records[0].fields, vec!["1", "3"]);
1296        assert_eq!(t.records[1].fields, vec!["2", "4"]);
1297    }
1298    #[test]
1299    fn transpose_empty() {
1300        let csv = CsvFile::new(vec![]);
1301        let t = transpose_csv(&csv);
1302        assert!(t.headers.is_empty());
1303    }
1304    #[test]
1305    fn inner_join_basic() {
1306        let mut left = CsvFile::new(vec!["id".into(), "name".into()]);
1307        left.add_record(vec!["1".into(), "alice".into()]);
1308        left.add_record(vec!["2".into(), "bob".into()]);
1309        let mut right = CsvFile::new(vec!["id".into(), "score".into()]);
1310        right.add_record(vec!["1".into(), "90".into()]);
1311        right.add_record(vec!["3".into(), "80".into()]);
1312        let joined = inner_join_csv(&left, &right, "id").unwrap();
1313        assert_eq!(joined.record_count(), 1);
1314        assert_eq!(joined.headers, vec!["id", "name", "score"]);
1315        assert_eq!(joined.records[0].fields[1], "alice");
1316        assert_eq!(joined.records[0].fields[2], "90");
1317    }
1318    #[test]
1319    fn inner_join_missing_key() {
1320        let left = CsvFile::new(vec!["a".into()]);
1321        let right = CsvFile::new(vec!["b".into()]);
1322        assert!(inner_join_csv(&left, &right, "id").is_err());
1323    }
1324    #[test]
1325    fn diff_csv_basic() {
1326        let mut a = CsvFile::new(vec!["v".into()]);
1327        a.add_record(vec!["5.0".into()]);
1328        a.add_record(vec!["3.0".into()]);
1329        let mut b = CsvFile::new(vec!["v".into()]);
1330        b.add_record(vec!["1.0".into()]);
1331        b.add_record(vec!["1.0".into()]);
1332        let d = diff_csv(&a, &b).unwrap();
1333        let vals = d.get_column_f64(0).unwrap();
1334        assert!((vals[0] - 4.0).abs() < 1e-12);
1335        assert!((vals[1] - 2.0).abs() < 1e-12);
1336    }
1337    #[test]
1338    fn diff_csv_row_count_mismatch() {
1339        let mut a = CsvFile::new(vec!["v".into()]);
1340        a.add_record(vec!["1.0".into()]);
1341        let b = CsvFile::new(vec!["v".into()]);
1342        assert!(diff_csv(&a, &b).is_err());
1343    }
1344    #[test]
1345    fn has_header_true() {
1346        assert!(has_header("time,x,y\n0,1,2\n", ','));
1347    }
1348    #[test]
1349    fn has_header_false_all_numbers() {
1350        assert!(!has_header("0,1,2\n3,4,5\n", ','));
1351    }
1352    #[test]
1353    fn infer_headers_count() {
1354        let headers = infer_headers("1,2,3,4\n", ',');
1355        assert_eq!(headers, vec!["col_0", "col_1", "col_2", "col_3"]);
1356    }
1357    #[test]
1358    fn parse_smart_with_header() {
1359        let s = "a,b\n1,2\n3,4\n";
1360        let csv = parse_smart(s).unwrap();
1361        assert_eq!(csv.headers, vec!["a", "b"]);
1362        assert_eq!(csv.record_count(), 2);
1363    }
1364    #[test]
1365    fn parse_smart_without_header() {
1366        let s = "1,2\n3,4\n";
1367        let csv = parse_smart(s).unwrap();
1368        assert_eq!(csv.headers, vec!["col_0", "col_1"]);
1369        assert_eq!(csv.record_count(), 2);
1370    }
1371    #[test]
1372    fn boolean_column_detection() {
1373        let mut csv = CsvFile::new(vec!["flag".into()]);
1374        csv.add_record(vec!["true".into()]);
1375        csv.add_record(vec!["false".into()]);
1376        csv.add_record(vec!["yes".into()]);
1377        assert!(is_boolean_column(&csv, 0));
1378    }
1379    #[test]
1380    fn boolean_column_rejection() {
1381        let mut csv = CsvFile::new(vec!["v".into()]);
1382        csv.add_record(vec!["true".into()]);
1383        csv.add_record(vec!["maybe".into()]);
1384        assert!(!is_boolean_column(&csv, 0));
1385    }
1386    #[test]
1387    fn get_column_bool_values() {
1388        let mut csv = CsvFile::new(vec!["b".into()]);
1389        csv.add_record(vec!["1".into()]);
1390        csv.add_record(vec!["0".into()]);
1391        csv.add_record(vec!["yes".into()]);
1392        csv.add_record(vec!["no".into()]);
1393        let vals = get_column_bool(&csv, 0).unwrap();
1394        assert_eq!(vals, vec![true, false, true, false]);
1395    }
1396    #[test]
1397    fn get_column_bool_error_on_bad() {
1398        let mut csv = CsvFile::new(vec!["b".into()]);
1399        csv.add_record(vec!["maybe".into()]);
1400        assert!(get_column_bool(&csv, 0).is_err());
1401    }
1402    #[test]
1403    fn sample_every_nth_basic() {
1404        let mut csv = CsvFile::new(vec!["v".into()]);
1405        for i in 0..10_usize {
1406            csv.add_record(vec![i.to_string()]);
1407        }
1408        let sampled = sample_every_nth(&csv, 3);
1409        assert_eq!(sampled.record_count(), 4);
1410    }
1411    #[test]
1412    fn sample_every_nth_stride_one() {
1413        let mut csv = CsvFile::new(vec!["v".into()]);
1414        csv.add_record_f64(&[1.0]);
1415        csv.add_record_f64(&[2.0]);
1416        let s = sample_every_nth(&csv, 1);
1417        assert_eq!(s.record_count(), 2);
1418    }
1419    #[test]
1420    fn sample_every_nth_zero_stride() {
1421        let mut csv = CsvFile::new(vec!["v".into()]);
1422        csv.add_record_f64(&[1.0]);
1423        let s = sample_every_nth(&csv, 0);
1424        assert_eq!(s.record_count(), 0);
1425    }
1426    #[test]
1427    fn dedup_by_column_basic() {
1428        let mut csv = CsvFile::new(vec!["id".into(), "val".into()]);
1429        csv.add_record(vec!["1".into(), "a".into()]);
1430        csv.add_record(vec!["2".into(), "b".into()]);
1431        csv.add_record(vec!["1".into(), "c".into()]);
1432        let deduped = dedup_by_column(&csv, 0);
1433        assert_eq!(deduped.record_count(), 2);
1434        assert_eq!(deduped.records[0].fields[1], "a");
1435    }
1436    #[test]
1437    fn dedup_by_column_all_unique() {
1438        let mut csv = CsvFile::new(vec!["id".into()]);
1439        for i in 0..5_usize {
1440            csv.add_record(vec![i.to_string()]);
1441        }
1442        let d = dedup_by_column(&csv, 0);
1443        assert_eq!(d.record_count(), 5);
1444    }
1445    #[test]
1446    fn lazy_iter_yields_correct_rows() {
1447        let input = "x,y\n1,2\n3,4\n5,6\n";
1448        let mut it = LazyCsvIter::new(input, ',');
1449        assert_eq!(it.headers, vec!["x", "y"]);
1450        let r1 = it.next().unwrap();
1451        assert_eq!(r1, vec!["1", "2"]);
1452        let r2 = it.next().unwrap();
1453        assert_eq!(r2, vec!["3", "4"]);
1454        let r3 = it.next().unwrap();
1455        assert_eq!(r3, vec!["5", "6"]);
1456        assert!(it.next().is_none());
1457    }
1458    #[test]
1459    fn lazy_iter_tab_delimiter() {
1460        let input = "a\tb\n10\t20\n";
1461        let mut it = LazyCsvIter::new(input, '\t');
1462        assert_eq!(it.headers, vec!["a", "b"]);
1463        let row = it.next().unwrap();
1464        assert_eq!(row[0], "10");
1465        assert_eq!(row[1], "20");
1466    }
1467    #[test]
1468    fn validation_report_is_valid() {
1469        let schema = CsvSchema::new(vec![("x".into(), ColumnType::Float)]);
1470        let mut csv = CsvFile::new(vec!["x".into()]);
1471        csv.add_record(vec!["3.14".into()]);
1472        let report = validate_csv(&csv, &schema);
1473        assert!(report.is_valid());
1474        assert_eq!(report.error_count(), 0);
1475    }
1476    #[test]
1477    fn validation_report_has_errors() {
1478        let schema = CsvSchema::new(vec![("x".into(), ColumnType::Integer)]);
1479        let mut csv = CsvFile::new(vec!["x".into()]);
1480        csv.add_record(vec!["not_int".into()]);
1481        let report = validate_csv(&csv, &schema);
1482        assert!(!report.is_valid());
1483        assert!(report.error_count() > 0);
1484    }
1485    #[test]
1486    fn normalize_header_special_chars() {
1487        assert_eq!(normalize_header("E (J/mol)"), "e__j_mol_");
1488    }
1489    #[test]
1490    fn normalize_header_already_clean() {
1491        assert_eq!(normalize_header("velocity_x"), "velocity_x");
1492    }
1493}