1use crate::Error;
6use std::collections::HashMap;
7
8use super::types::{ColumnType, CsvChangedRow, CsvDiff, CsvTable, PivotAgg};
9
10pub(super) fn split_csv_line(line: &str, delim: char) -> Vec<String> {
12 let mut fields = Vec::new();
13 let mut current = String::new();
14 let mut in_quotes = false;
15 let mut chars = line.chars().peekable();
16 while let Some(ch) = chars.next() {
17 if ch == '"' {
18 if in_quotes {
19 if chars.peek() == Some(&'"') {
20 chars.next();
21 current.push('"');
22 } else {
23 in_quotes = false;
24 }
25 } else {
26 in_quotes = true;
27 }
28 } else if ch == delim && !in_quotes {
29 fields.push(current.clone());
30 current.clear();
31 } else {
32 current.push(ch);
33 }
34 }
35 fields.push(current);
36 fields
37}
38pub(super) fn quote_field(field: &str, delimiter: char) -> String {
40 if field.contains(delimiter)
41 || field.contains('"')
42 || field.contains('\n')
43 || field.contains('\r')
44 {
45 let escaped = field.replace('"', "\"\"");
46 format!("\"{}\"", escaped)
47 } else {
48 field.to_string()
49 }
50}
51#[allow(dead_code)]
55pub fn infer_column_type(values: &[&str]) -> ColumnType {
56 let non_empty: Vec<&str> = values
57 .iter()
58 .copied()
59 .filter(|s| !s.trim().is_empty())
60 .collect();
61 if non_empty.is_empty() {
62 return ColumnType::Empty;
63 }
64 let all_int = non_empty.iter().all(|s| s.trim().parse::<i64>().is_ok());
65 if all_int {
66 return ColumnType::Integer;
67 }
68 let all_float = non_empty.iter().all(|s| s.trim().parse::<f64>().is_ok());
69 if all_float {
70 return ColumnType::Float;
71 }
72 let bool_values = ["true", "false", "yes", "no", "1", "0"];
73 let all_bool = non_empty
74 .iter()
75 .all(|s| bool_values.contains(&s.trim().to_lowercase().as_str()));
76 if all_bool {
77 return ColumnType::Boolean;
78 }
79 ColumnType::Text
80}
81#[allow(dead_code)]
85pub fn infer_table_types(table: &CsvTable) -> Vec<(String, ColumnType)> {
86 table
87 .headers
88 .iter()
89 .enumerate()
90 .map(|(i, name)| {
91 let values: Vec<&str> = table.rows.iter().map(|r| r[i].as_str()).collect();
92 let typ = infer_column_type(&values);
93 (name.clone(), typ)
94 })
95 .collect()
96}
97#[allow(dead_code)]
112pub fn csv_merge(left: &CsvTable, right: &CsvTable) -> std::result::Result<CsvTable, Error> {
113 if left.headers != right.headers {
114 return Err(Error::Parse(format!(
115 "csv_merge: header mismatch: {:?} vs {:?}",
116 left.headers, right.headers
117 )));
118 }
119 let mut result = left.clone();
120 result.rows.extend(right.rows.iter().cloned());
121 Ok(result)
122}
123#[allow(dead_code)]
143pub fn csv_join(
144 left: &CsvTable,
145 right: &CsvTable,
146 key: &str,
147) -> std::result::Result<CsvTable, Error> {
148 let left_key_idx = left.column_index(key)?;
149 let right_key_idx = right.column_index(key)?;
150 let mut right_map: HashMap<String, Vec<Vec<String>>> = HashMap::new();
151 for row in &right.rows {
152 right_map
153 .entry(row[right_key_idx].clone())
154 .or_default()
155 .push(row.clone());
156 }
157 let right_extra_headers: Vec<String> = right
158 .headers
159 .iter()
160 .enumerate()
161 .filter(|(i, _)| *i != right_key_idx)
162 .map(|(_, h)| h.clone())
163 .collect();
164 let right_extra_indices: Vec<usize> = right
165 .headers
166 .iter()
167 .enumerate()
168 .filter(|(i, _)| *i != right_key_idx)
169 .map(|(i, _)| i)
170 .collect();
171 let mut result_headers = left.headers.clone();
172 result_headers.extend(right_extra_headers);
173 let mut result_rows = Vec::new();
174 for left_row in &left.rows {
175 let key_val = &left_row[left_key_idx];
176 if let Some(right_rows) = right_map.get(key_val) {
177 for right_row in right_rows {
178 let mut merged = left_row.clone();
179 for &ri in &right_extra_indices {
180 merged.push(right_row[ri].clone());
181 }
182 result_rows.push(merged);
183 }
184 }
185 }
186 Ok(CsvTable {
187 headers: result_headers,
188 rows: result_rows,
189 })
190}
191#[allow(dead_code)]
215pub fn csv_pivot(
216 table: &CsvTable,
217 row_col: &str,
218 col_col: &str,
219 val_col: &str,
220 agg: PivotAgg,
221) -> std::result::Result<CsvTable, Error> {
222 let row_idx = table.column_index(row_col)?;
223 let col_idx = table.column_index(col_col)?;
224 let val_idx = table.column_index(val_col)?;
225 let mut row_keys: Vec<String> = Vec::new();
226 let mut col_keys: Vec<String> = Vec::new();
227 for row in &table.rows {
228 let rk = row[row_idx].clone();
229 if !row_keys.contains(&rk) {
230 row_keys.push(rk);
231 }
232 let ck = row[col_idx].clone();
233 if !col_keys.contains(&ck) {
234 col_keys.push(ck);
235 }
236 }
237 let mut buckets: HashMap<(String, String), Vec<f64>> = HashMap::new();
238 for row in &table.rows {
239 let rk = row[row_idx].clone();
240 let ck = row[col_idx].clone();
241 let vs = row[val_idx].trim();
242 if let Ok(v) = vs.parse::<f64>() {
243 buckets.entry((rk, ck)).or_default().push(v);
244 }
245 }
246 let mut headers = vec![row_col.to_string()];
247 headers.extend(col_keys.iter().cloned());
248 let rows: Vec<Vec<String>> = row_keys
249 .iter()
250 .map(|rk| {
251 let mut row_out = vec![rk.clone()];
252 for ck in &col_keys {
253 let cell = if let Some(vals) = buckets.get(&(rk.clone(), ck.clone())) {
254 let agg_val = match agg {
255 PivotAgg::Sum => vals.iter().sum::<f64>(),
256 PivotAgg::Mean => vals.iter().sum::<f64>() / vals.len() as f64,
257 PivotAgg::Count => vals.len() as f64,
258 PivotAgg::Min => vals.iter().cloned().fold(f64::INFINITY, f64::min),
259 PivotAgg::Max => vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max),
260 };
261 format!("{}", agg_val)
262 } else {
263 String::new()
264 };
265 row_out.push(cell);
266 }
267 row_out
268 })
269 .collect();
270 Ok(CsvTable { headers, rows })
271}
272#[allow(dead_code)]
294pub fn csv_diff(
295 left: &CsvTable,
296 right: &CsvTable,
297 key_col: &str,
298) -> std::result::Result<CsvDiff, Error> {
299 if left.headers != right.headers {
300 return Err(Error::Parse(format!(
301 "csv_diff: header mismatch: {:?} vs {:?}",
302 left.headers, right.headers
303 )));
304 }
305 let key_idx = left.column_index(key_col)?;
306 let left_map: HashMap<String, &Vec<String>> =
307 left.rows.iter().map(|r| (r[key_idx].clone(), r)).collect();
308 let right_map: HashMap<String, &Vec<String>> =
309 right.rows.iter().map(|r| (r[key_idx].clone(), r)).collect();
310 let mut removed = Vec::new();
311 let mut changed = Vec::new();
312 for (key, left_row) in &left_map {
313 match right_map.get(key) {
314 None => removed.push((*left_row).clone()),
315 Some(right_row) => {
316 if left_row != right_row {
317 changed.push(CsvChangedRow {
318 key: key.clone(),
319 before: (*left_row).clone(),
320 after: (*right_row).clone(),
321 });
322 }
323 }
324 }
325 }
326 let added: Vec<Vec<String>> = right_map
327 .iter()
328 .filter(|(key, _)| !left_map.contains_key(*key))
329 .map(|(_, row)| (*row).clone())
330 .collect();
331 Ok(CsvDiff {
332 removed,
333 added,
334 changed,
335 })
336}
337#[cfg(test)]
338mod tests {
339 use super::*;
340 use crate::CsvReader;
341 use crate::CsvWriter;
342 use crate::csv::ConfigurableCsvWriter;
343 use crate::csv::CsvParser;
344 use crate::csv::CsvRecord;
345 use crate::csv::CsvStreamParser;
346 use crate::csv::CsvWriterConfig;
347 use crate::csv::InMemoryCsvReader;
348 use crate::csv::InMemoryCsvWriter;
349 use crate::csv::TypedCsvReader;
350 use crate::csv::types::*;
351 use std::fs::File;
352 use std::io::Write;
353 #[test]
354 fn test_csv_write_and_read_roundtrip() {
355 let path = "/tmp/oxiphy_test.csv";
356 {
357 let mut w = CsvWriter::new(path, &["time", "energy", "temperature"]).unwrap();
358 w.write_row(&[0.0, 100.0, 300.0]).unwrap();
359 w.write_row(&[1.0, 99.5, 299.8]).unwrap();
360 w.write_row(&[2.0, 99.0, 299.5]).unwrap();
361 }
362 let (headers, rows) = CsvReader::read(path).unwrap();
363 assert_eq!(headers, vec!["time", "energy", "temperature"]);
364 assert_eq!(rows.len(), 3);
365 assert!((rows[0][0] - 0.0).abs() < 1e-10);
366 assert!((rows[2][2] - 299.5).abs() < 1e-10);
367 std::fs::remove_file(path).ok();
368 }
369 #[test]
370 fn test_csv_write_read() {
371 let path = "/tmp/oxiphy_test_positions.csv";
372 let particle_positions: Vec<[f64; 3]> = vec![
373 [1.0, 2.0, 3.0],
374 [4.5, 5.5, 6.5],
375 [-1.0, -2.0, -3.0],
376 [0.0, 0.0, 0.0],
377 ];
378 {
379 let mut w = CsvWriter::new(path, &["x", "y", "z"]).unwrap();
380 for pos in &particle_positions {
381 w.write_row(pos).unwrap();
382 }
383 }
384 let (headers, rows) = CsvReader::read(path).unwrap();
385 assert_eq!(headers, vec!["x", "y", "z"]);
386 assert_eq!(rows.len(), 4, "expected 4 rows");
387 for (i, expected) in particle_positions.iter().enumerate() {
388 assert!(
389 (rows[i][0] - expected[0]).abs() < 1e-10,
390 "row {} x mismatch: {} vs {}",
391 i,
392 rows[i][0],
393 expected[0]
394 );
395 assert!(
396 (rows[i][1] - expected[1]).abs() < 1e-10,
397 "row {} y mismatch: {} vs {}",
398 i,
399 rows[i][1],
400 expected[1]
401 );
402 assert!(
403 (rows[i][2] - expected[2]).abs() < 1e-10,
404 "row {} z mismatch: {} vs {}",
405 i,
406 rows[i][2],
407 expected[2]
408 );
409 }
410 std::fs::remove_file(path).ok();
411 }
412 #[test]
413 fn test_csv_with_empty_data() {
414 let path = "/tmp/oxiphy_test_empty.csv";
415 {
416 let _w = CsvWriter::new(path, &["x", "y"]).unwrap();
417 }
418 let (headers, rows) = CsvReader::read(path).unwrap();
419 assert_eq!(headers, vec!["x", "y"]);
420 assert!(rows.is_empty());
421 std::fs::remove_file(path).ok();
422 }
423 #[test]
424 fn test_in_memory_writer_header() {
425 let w = InMemoryCsvWriter::new(&["x", "y", "z"], ',');
426 assert_eq!(w.write_header(), "x,y,z");
427 }
428 #[test]
429 fn test_in_memory_writer_tab_delimiter() {
430 let w = InMemoryCsvWriter::new(&["a", "b"], '\t');
431 assert_eq!(w.write_header(), "a\tb");
432 let row = w.write_row(&[1.0, 2.0]).unwrap();
433 assert!(row.contains('\t'));
434 }
435 #[test]
436 fn test_in_memory_writer_row_wrong_len() {
437 let w = InMemoryCsvWriter::new(&["x", "y"], ',');
438 assert!(w.write_row(&[1.0]).is_err());
439 }
440 #[test]
441 fn test_in_memory_writer_write_all() {
442 let w = InMemoryCsvWriter::new(&["t", "v"], ',').with_precision(2);
443 let rows = vec![vec![0.0, 1.0], vec![1.0, 2.5]];
444 let out = w.write_all(&rows);
445 assert!(out.starts_with("t,v\n"));
446 assert!(out.contains("0.00"));
447 assert!(out.contains("2.50"));
448 }
449 #[test]
450 fn test_in_memory_writer_precision() {
451 let w = InMemoryCsvWriter::new(&["v"], ',').with_precision(3);
452 let row = w.write_row(&[2.54321]).unwrap();
453 assert_eq!(row, "2.543");
454 }
455 #[test]
456 fn test_in_memory_reader_basic() {
457 let data = "x,y,z\n1.0,2.0,3.0\n4.0,5.0,6.0\n";
458 let reader = InMemoryCsvReader::from_str(data).unwrap();
459 assert_eq!(reader.get_row_count(), 2);
460 let x = reader.get_column_f64("x").unwrap();
461 assert!((x[0] - 1.0).abs() < 1e-10);
462 assert!((x[1] - 4.0).abs() < 1e-10);
463 }
464 #[test]
465 fn test_in_memory_reader_missing_values() {
466 let data = "a,b\n1.0,\n,3.0\n";
467 let reader = InMemoryCsvReader::from_str(data).unwrap();
468 let a = reader.get_column_f64("a").unwrap();
469 let b = reader.get_column_f64("b").unwrap();
470 assert!((a[0] - 1.0).abs() < 1e-10);
471 assert!(a[1].is_nan());
472 assert!(b[0].is_nan());
473 assert!((b[1] - 3.0).abs() < 1e-10);
474 }
475 #[test]
476 fn test_in_memory_reader_comment_lines() {
477 let data = "# This is a comment\nx,y\n# Another comment\n1.0,2.0\n3.0,4.0\n";
478 let reader = InMemoryCsvReader::from_str(data).unwrap();
479 assert_eq!(reader.get_row_count(), 2);
480 assert_eq!(reader.headers(), &["x", "y"]);
481 }
482 #[test]
483 fn test_in_memory_reader_semicolon_delimiter() {
484 let data = "a;b;c\n1.0;2.0;3.0\n";
485 let reader = InMemoryCsvReader::parse_with_delimiter(data, ';').unwrap();
486 let b = reader.get_column_f64("b").unwrap();
487 assert!((b[0] - 2.0).abs() < 1e-10);
488 }
489 #[test]
490 fn test_in_memory_reader_empty_input() {
491 assert!(InMemoryCsvReader::from_str("").is_err());
492 assert!(InMemoryCsvReader::from_str("# only comments\n").is_err());
493 }
494 #[test]
495 fn test_in_memory_reader_column_not_found() {
496 let data = "x,y\n1.0,2.0\n";
497 let reader = InMemoryCsvReader::from_str(data).unwrap();
498 assert!(reader.get_column_f64("z").is_err());
499 }
500 #[test]
501 fn test_column_stats() {
502 let data = "v\n1.0\n2.0\n3.0\n4.0\n5.0\n";
503 let reader = InMemoryCsvReader::from_str(data).unwrap();
504 let (min, max, mean, std) = reader.column_stats("v").unwrap();
505 assert!((min - 1.0).abs() < 1e-10);
506 assert!((max - 5.0).abs() < 1e-10);
507 assert!((mean - 3.0).abs() < 1e-10);
508 assert!((std - 2.0_f64.sqrt()).abs() < 1e-10);
509 }
510 #[test]
511 fn test_column_stats_with_missing() {
512 let data = "v\n2.0\n\n4.0\n";
513 let reader = InMemoryCsvReader::from_str(data).unwrap();
514 let (min, max, mean, _std) = reader.column_stats("v").unwrap();
515 assert!((min - 2.0).abs() < 1e-10);
516 assert!((max - 4.0).abs() < 1e-10);
517 assert!((mean - 3.0).abs() < 1e-10);
518 }
519 #[test]
520 fn test_write_read_roundtrip_in_memory() {
521 let writer = InMemoryCsvWriter::new(&["time", "energy"], ',').with_precision(4);
522 let rows = vec![vec![0.0, 100.0], vec![0.5, 99.5], vec![1.0, 99.0]];
523 let csv_str = writer.write_all(&rows);
524 let reader = InMemoryCsvReader::from_str(&csv_str).unwrap();
525 assert_eq!(reader.get_row_count(), 3);
526 let time = reader.get_column_f64("time").unwrap();
527 let energy = reader.get_column_f64("energy").unwrap();
528 assert!((time[1] - 0.5).abs() < 1e-3);
529 assert!((energy[2] - 99.0).abs() < 1e-3);
530 }
531 #[test]
532 fn test_quoted_fields() {
533 let data = "name,value\n\"hello,world\",42.0\n";
534 let reader = InMemoryCsvReader::from_str(data).unwrap();
535 assert_eq!(reader.get_row_count(), 1);
536 let value = reader.get_column_f64("value").unwrap();
537 assert!((value[0] - 42.0).abs() < 1e-10);
538 }
539 #[test]
540 fn test_csv_parser_basic() {
541 let data = "name,age\nAlice,30\nBob,25\n";
542 let parser = CsvParser::new(data, ',');
543 let records = parser.parse_all().unwrap();
544 assert_eq!(records.len(), 3);
545 assert_eq!(records[0].get(0), "name");
546 assert_eq!(records[1].get(0), "Alice");
547 assert_eq!(records[2].get(1), "25");
548 }
549 #[test]
550 fn test_csv_parser_quoted_with_delimiter() {
551 let data = "a,b\n\"x,y\",2\n";
552 let parser = CsvParser::new(data, ',');
553 let records = parser.parse_all().unwrap();
554 assert_eq!(records[1].get(0), "x,y");
555 assert_eq!(records[1].get(1), "2");
556 }
557 #[test]
558 fn test_csv_parser_escaped_quote() {
559 let data = "q\n\"he said \"\"hi\"\"\"\n";
560 let parser = CsvParser::new(data, ',');
561 let records = parser.parse_all().unwrap();
562 assert_eq!(records[1].get(0), "he said \"hi\"");
563 }
564 #[test]
565 fn test_csv_parser_backslash_escape() {
566 let data = "s\n\"line1\\nline2\"\n";
567 let parser = CsvParser::new(data, ',');
568 let records = parser.parse_all().unwrap();
569 assert_eq!(records[1].get(0), "line1\nline2");
570 }
571 #[test]
572 fn test_csv_parser_comment_skip() {
573 let data = "# header comment\nx,y\n# data comment\n1,2\n";
574 let parser = CsvParser::new(data, ',').with_comment_prefix('#');
575 let records = parser.parse_all().unwrap();
576 assert_eq!(records.len(), 2);
577 assert_eq!(records[0].get(0), "x");
578 }
579 #[test]
580 fn test_csv_parser_empty_fields() {
581 let data = "a,b,c\n1,,3\n";
582 let parser = CsvParser::new(data, ',');
583 let records = parser.parse_all().unwrap();
584 assert_eq!(records[1].get(1), "");
585 assert_eq!(records[1].get(2), "3");
586 }
587 #[test]
588 fn test_csv_table_from_str() {
589 let data = "x,y\n1,2\n3,4\n";
590 let table = CsvTable::from_str(data, ',').unwrap();
591 assert_eq!(table.row_count(), 2);
592 assert_eq!(table.col_count(), 2);
593 }
594 #[test]
595 fn test_csv_table_column_f64() {
596 let data = "x,y\n1.5,2.5\n3.5,4.5\n";
597 let table = CsvTable::from_str(data, ',').unwrap();
598 let x = table.column_f64("x").unwrap();
599 assert!((x[0] - 1.5).abs() < 1e-10);
600 assert!((x[1] - 3.5).abs() < 1e-10);
601 }
602 #[test]
603 fn test_csv_table_to_csv_string() {
604 let data = "a,b\n1,2\n3,4\n";
605 let table = CsvTable::from_str(data, ',').unwrap();
606 let out = table.to_csv_string(',');
607 assert!(out.starts_with("a,b\n"));
608 assert!(out.contains("1,2"));
609 }
610 #[test]
611 fn test_csv_table_quote_field_with_delimiter() {
612 let data = "name,val\n\"x,y\",5\n";
613 let table = CsvTable::from_str(data, ',').unwrap();
614 let names = table.column_values("name").unwrap();
615 assert_eq!(names[0], "x,y");
616 }
617 #[test]
618 fn test_configurable_writer_semicolon() {
619 let cfg = CsvWriterConfig {
620 delimiter: ';',
621 precision: 2,
622 ..Default::default()
623 };
624 let mut w = ConfigurableCsvWriter::new(cfg);
625 w.write_header(&["x", "y"]);
626 w.write_f64_row(&[1.0, 2.5]);
627 let out = w.finish();
628 assert!(out.starts_with("x;y\n"));
629 assert!(out.contains("1.00;2.50"));
630 }
631 #[test]
632 fn test_configurable_writer_quote_all() {
633 let cfg = CsvWriterConfig {
634 quote_all: true,
635 ..Default::default()
636 };
637 let mut w = ConfigurableCsvWriter::new(cfg);
638 w.write_str_row(&["hello", "world"]);
639 let out = w.finish();
640 assert!(out.contains("\"hello\""));
641 assert!(out.contains("\"world\""));
642 }
643 #[test]
644 fn test_configurable_writer_crlf() {
645 let cfg = CsvWriterConfig {
646 line_ending: "\r\n".to_string(),
647 ..Default::default()
648 };
649 let mut w = ConfigurableCsvWriter::new(cfg);
650 w.write_header(&["v"]);
651 let out = w.finish();
652 assert!(out.ends_with("\r\n"));
653 }
654 #[test]
655 fn test_infer_integer() {
656 assert_eq!(infer_column_type(&["1", "2", "3"]), ColumnType::Integer);
657 assert_eq!(infer_column_type(&["-5", "0", "100"]), ColumnType::Integer);
658 }
659 #[test]
660 fn test_infer_float() {
661 assert_eq!(
662 infer_column_type(&["1.0", "2.5", "3.14"]),
663 ColumnType::Float
664 );
665 assert_eq!(infer_column_type(&["1e10", "-0.5"]), ColumnType::Float);
666 }
667 #[test]
668 fn test_infer_bool() {
669 assert_eq!(infer_column_type(&["true", "false"]), ColumnType::Boolean);
670 assert_eq!(infer_column_type(&["yes", "no"]), ColumnType::Boolean);
671 assert_eq!(infer_column_type(&["1", "0"]), ColumnType::Integer);
672 }
673 #[test]
674 fn test_infer_text() {
675 assert_eq!(infer_column_type(&["Alice", "Bob"]), ColumnType::Text);
676 assert_eq!(infer_column_type(&["1.0", "abc"]), ColumnType::Text);
677 }
678 #[test]
679 fn test_infer_empty() {
680 assert_eq!(infer_column_type(&["", ""]), ColumnType::Empty);
681 assert_eq!(infer_column_type(&[]), ColumnType::Empty);
682 }
683 #[test]
684 fn test_infer_with_missing() {
685 assert_eq!(infer_column_type(&["1.5", "", "3.5"]), ColumnType::Float);
686 }
687 #[test]
688 fn test_typed_reader_i64() {
689 let data = "id,val\n1,10\n2,20\n";
690 let reader = TypedCsvReader::from_str(data).unwrap();
691 let ids = reader.column_as_i64("id").unwrap();
692 assert_eq!(ids, vec![1, 2]);
693 }
694 #[test]
695 fn test_typed_reader_f64() {
696 let data = "x\n1.5\n2.5\n";
697 let reader = TypedCsvReader::from_str(data).unwrap();
698 let x = reader.column_as_f64("x").unwrap();
699 assert!((x[0] - 1.5).abs() < 1e-10);
700 }
701 #[test]
702 fn test_typed_reader_bool() {
703 let data = "active\ntrue\nfalse\nyes\nno\n";
704 let reader = TypedCsvReader::from_str(data).unwrap();
705 let active = reader.column_as_bool("active").unwrap();
706 assert_eq!(active, vec![true, false, true, false]);
707 }
708 #[test]
709 fn test_typed_reader_column_type() {
710 let data = "id,name,score\n1,Alice,9.5\n2,Bob,8.0\n";
711 let reader = TypedCsvReader::from_str(data).unwrap();
712 assert_eq!(reader.column_type("id").unwrap(), ColumnType::Integer);
713 assert_eq!(reader.column_type("name").unwrap(), ColumnType::Text);
714 assert_eq!(reader.column_type("score").unwrap(), ColumnType::Float);
715 }
716 #[test]
717 fn test_typed_reader_headers_and_count() {
718 let data = "a,b\n1,2\n3,4\n5,6\n";
719 let reader = TypedCsvReader::from_str(data).unwrap();
720 assert_eq!(reader.row_count(), 3);
721 assert_eq!(reader.headers(), &["a", "b"]);
722 }
723 #[test]
724 fn test_csv_merge_basic() {
725 let a = CsvTable::from_str("x,y\n1,2\n", ',').unwrap();
726 let b = CsvTable::from_str("x,y\n3,4\n", ',').unwrap();
727 let merged = csv_merge(&a, &b).unwrap();
728 assert_eq!(merged.row_count(), 2);
729 let x = merged.column_f64("x").unwrap();
730 assert!((x[0] - 1.0).abs() < 1e-10);
731 assert!((x[1] - 3.0).abs() < 1e-10);
732 }
733 #[test]
734 fn test_csv_merge_header_mismatch() {
735 let a = CsvTable::from_str("x,y\n1,2\n", ',').unwrap();
736 let b = CsvTable::from_str("x,z\n3,4\n", ',').unwrap();
737 assert!(csv_merge(&a, &b).is_err());
738 }
739 #[test]
740 fn test_csv_merge_empty_right() {
741 let a = CsvTable::from_str("x\n1\n2\n", ',').unwrap();
742 let b = CsvTable::new(vec!["x".to_string()]);
743 let merged = csv_merge(&a, &b).unwrap();
744 assert_eq!(merged.row_count(), 2);
745 }
746 #[test]
747 fn test_csv_join_basic() {
748 let left = CsvTable::from_str("id,x\n1,10\n2,20\n3,30\n", ',').unwrap();
749 let right = CsvTable::from_str("id,y\n1,100\n3,300\n", ',').unwrap();
750 let joined = csv_join(&left, &right, "id").unwrap();
751 assert_eq!(joined.row_count(), 2);
752 let y = joined.column_f64("y").unwrap();
753 assert!((y[0] - 100.0).abs() < 1e-10);
754 assert!((y[1] - 300.0).abs() < 1e-10);
755 }
756 #[test]
757 fn test_csv_join_no_matches() {
758 let left = CsvTable::from_str("id,x\n1,10\n", ',').unwrap();
759 let right = CsvTable::from_str("id,y\n9,99\n", ',').unwrap();
760 let joined = csv_join(&left, &right, "id").unwrap();
761 assert_eq!(joined.row_count(), 0);
762 }
763 #[test]
764 fn test_csv_join_key_not_found() {
765 let left = CsvTable::from_str("id,x\n1,10\n", ',').unwrap();
766 let right = CsvTable::from_str("id,y\n1,100\n", ',').unwrap();
767 assert!(csv_join(&left, &right, "missing").is_err());
768 }
769 #[test]
770 fn test_csv_pivot_sum() {
771 let data = "region,product,sales\nNorth,A,10\nNorth,B,20\nSouth,A,30\nSouth,B,40\n";
772 let table = CsvTable::from_str(data, ',').unwrap();
773 let pivot = csv_pivot(&table, "region", "product", "sales", PivotAgg::Sum).unwrap();
774 assert_eq!(pivot.row_count(), 2);
775 assert!(pivot.headers.contains(&"A".to_string()));
776 assert!(pivot.headers.contains(&"B".to_string()));
777 }
778 #[test]
779 fn test_csv_pivot_count() {
780 let data = "cat,sub,v\nA,X,1\nA,X,2\nA,Y,3\nB,X,4\n";
781 let table = CsvTable::from_str(data, ',').unwrap();
782 let pivot = csv_pivot(&table, "cat", "sub", "v", PivotAgg::Count).unwrap();
783 let x_col = pivot.column_values("X").unwrap();
784 assert_eq!(x_col[0], "2");
785 }
786 #[test]
787 fn test_csv_pivot_mean() {
788 let data = "g,c,v\nA,X,10\nA,X,20\n";
789 let table = CsvTable::from_str(data, ',').unwrap();
790 let pivot = csv_pivot(&table, "g", "c", "v", PivotAgg::Mean).unwrap();
791 let x_col = pivot.column_values("X").unwrap();
792 let mean: f64 = x_col[0].parse().unwrap();
793 assert!((mean - 15.0).abs() < 1e-10);
794 }
795 #[test]
796 fn test_csv_diff_basic() {
797 let a = CsvTable::from_str("id,v\n1,10\n2,20\n3,30\n", ',').unwrap();
798 let b = CsvTable::from_str("id,v\n1,10\n2,99\n4,40\n", ',').unwrap();
799 let diff = csv_diff(&a, &b, "id").unwrap();
800 assert_eq!(diff.removed.len(), 1);
801 assert_eq!(diff.added.len(), 1);
802 assert_eq!(diff.changed.len(), 1);
803 assert_eq!(diff.changed[0].key, "2");
804 }
805 #[test]
806 fn test_csv_diff_no_changes() {
807 let a = CsvTable::from_str("id,v\n1,10\n2,20\n", ',').unwrap();
808 let b = CsvTable::from_str("id,v\n1,10\n2,20\n", ',').unwrap();
809 let diff = csv_diff(&a, &b, "id").unwrap();
810 assert_eq!(diff.removed.len(), 0);
811 assert_eq!(diff.added.len(), 0);
812 assert_eq!(diff.changed.len(), 0);
813 }
814 #[test]
815 fn test_csv_diff_header_mismatch() {
816 let a = CsvTable::from_str("id,v\n1,10\n", ',').unwrap();
817 let b = CsvTable::from_str("id,w\n1,10\n", ',').unwrap();
818 assert!(csv_diff(&a, &b, "id").is_err());
819 }
820 #[test]
821 fn test_csv_diff_all_added() {
822 let a = CsvTable::new(vec!["id".to_string(), "v".to_string()]);
823 let b = CsvTable::from_str("id,v\n1,10\n", ',').unwrap();
824 let diff = csv_diff(&a, &b, "id").unwrap();
825 assert_eq!(diff.added.len(), 1);
826 assert_eq!(diff.removed.len(), 0);
827 }
828 #[test]
829 fn test_csv_stream_parser_file() {
830 let path = "/tmp/oxiphy_stream_test.csv";
831 {
832 let mut f = File::create(path).unwrap();
833 writeln!(f, "x,y,z").unwrap();
834 writeln!(f, "1,2,3").unwrap();
835 writeln!(f, "4,5,6").unwrap();
836 writeln!(f, "7,8,9").unwrap();
837 }
838 let mut parser = CsvStreamParser::open(path, ',').unwrap();
839 assert_eq!(parser.headers(), &["x", "y", "z"]);
840 let mut count = 0;
841 while let Some(rec) = parser.next_record().unwrap() {
842 count += 1;
843 assert_eq!(rec.len(), 3);
844 }
845 assert_eq!(count, 3);
846 std::fs::remove_file(path).ok();
847 }
848 #[test]
849 fn test_csv_stream_parser_large() {
850 let path = "/tmp/oxiphy_stream_large.csv";
851 let nrows = 500_usize;
852 {
853 let mut f = File::create(path).unwrap();
854 writeln!(f, "i,v").unwrap();
855 for i in 0..nrows {
856 writeln!(f, "{},{}", i, i as f64 * 1.5).unwrap();
857 }
858 }
859 let mut parser = CsvStreamParser::open(path, ',').unwrap();
860 let mut count = 0usize;
861 while let Some(_rec) = parser.next_record().unwrap() {
862 count += 1;
863 }
864 assert_eq!(count, nrows);
865 std::fs::remove_file(path).ok();
866 }
867 #[test]
868 fn test_csv_record_accessors() {
869 let rec = CsvRecord {
870 fields: vec!["a".to_string(), "b".to_string(), "c".to_string()],
871 };
872 assert_eq!(rec.len(), 3);
873 assert!(!rec.is_empty());
874 assert_eq!(rec.get(0), "a");
875 assert_eq!(rec.get(10), "");
876 }
877 #[test]
878 fn test_quote_field_no_quote_needed() {
879 assert_eq!(quote_field("hello", ','), "hello");
880 }
881 #[test]
882 fn test_quote_field_with_delimiter() {
883 assert_eq!(quote_field("a,b", ','), "\"a,b\"");
884 }
885 #[test]
886 fn test_quote_field_with_embedded_quote() {
887 assert_eq!(quote_field("say \"hi\"", ','), "\"say \"\"hi\"\"\"");
888 }
889 #[test]
890 fn test_infer_table_types_mixed() {
891 let data = "id,name,score\n1,Alice,9.5\n2,Bob,8.0\n";
892 let table = CsvTable::from_str(data, ',').unwrap();
893 let types = infer_table_types(&table);
894 assert_eq!(types[0].1, ColumnType::Integer);
895 assert_eq!(types[1].1, ColumnType::Text);
896 assert_eq!(types[2].1, ColumnType::Float);
897 }
898}