1use std::fmt;
34use std::fs;
35use std::io;
36
37#[derive(Debug)]
39pub enum CsvError {
40 IoError(String),
42 ParseError {
44 line: usize,
46 message: String,
48 },
49}
50
51impl fmt::Display for CsvError {
52 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53 match self {
54 CsvError::IoError(msg) => write!(f, "I/O error: {msg}"),
55 CsvError::ParseError { line, message } => {
56 write!(f, "parse error at line {line}: {message}")
57 }
58 }
59 }
60}
61
62impl std::error::Error for CsvError {}
63
64impl From<io::Error> for CsvError {
65 fn from(e: io::Error) -> Self {
66 CsvError::IoError(e.to_string())
67 }
68}
69
70fn parse_csv(input: &str, delimiter: u8) -> Result<Vec<Vec<String>>, CsvError> {
74 let delim = delimiter as char;
75 let mut rows: Vec<Vec<String>> = Vec::new();
76 let mut current_row: Vec<String> = Vec::new();
77 let mut field = String::new();
78 let mut in_quotes = false;
79 let mut chars = input.chars().peekable();
80 let mut logical_line: usize = 1;
81
82 while let Some(c) = chars.next() {
83 if in_quotes {
84 if c == '"' {
85 if chars.peek() == Some(&'"') {
87 chars.next();
88 field.push('"');
89 } else {
90 in_quotes = false;
92 }
93 } else {
94 if c == '\n' {
95 logical_line += 1;
96 }
97 field.push(c);
98 }
99 } else if c == '"' {
100 if field.is_empty() {
101 in_quotes = true;
103 } else {
104 field.push(c);
106 }
107 } else if c == delim {
108 current_row.push(std::mem::take(&mut field));
109 } else if c == '\n' {
110 current_row.push(std::mem::take(&mut field));
111 rows.push(std::mem::take(&mut current_row));
112 logical_line += 1;
113 } else if c == '\r' {
114 if chars.peek() == Some(&'\n') {
116 } else {
118 current_row.push(std::mem::take(&mut field));
120 rows.push(std::mem::take(&mut current_row));
121 logical_line += 1;
122 }
123 } else {
124 field.push(c);
125 }
126 }
127
128 if in_quotes {
129 return Err(CsvError::ParseError {
130 line: logical_line,
131 message: "unterminated quoted field".to_string(),
132 });
133 }
134
135 if !field.is_empty() || !current_row.is_empty() {
137 current_row.push(field);
138 rows.push(current_row);
139 }
140
141 Ok(rows)
142}
143
144fn detect_delimiter(input: &str) -> u8 {
149 let candidates: &[u8] = b",;\t|";
150 let mut best = b',';
151 let mut best_score: usize = 0;
152
153 for &delim in candidates {
154 if let Ok(rows) = parse_csv(input, delim) {
155 if rows.is_empty() {
156 continue;
157 }
158 let first_len = rows[0].len();
159 if first_len <= 1 {
160 continue;
161 }
162 let consistent = rows.iter().filter(|r| r.len() == first_len).count();
164 let score = consistent * first_len;
165 if score > best_score {
166 best_score = score;
167 best = delim;
168 }
169 }
170 }
171
172 best
173}
174
175pub struct CsvReader {
187 header_row: Option<Vec<String>>,
188 data_rows: Vec<Vec<String>>,
189 raw: String,
190 has_headers: bool,
191}
192
193impl CsvReader {
194 pub fn parse(data: &str) -> Self {
200 let delim = detect_delimiter(data);
201 let rows = parse_csv(data, delim).unwrap_or_default();
202 let mut reader = Self::build(rows, true);
203 reader.raw = data.to_string();
204 reader
205 }
206
207 pub fn from_path(path: &str) -> Result<Self, CsvError> {
211 let data = fs::read_to_string(path)?;
212 let delim = detect_delimiter(&data);
213 let rows = parse_csv(&data, delim)?;
214 let mut reader = Self::build(rows, true);
215 reader.raw = data;
216 Ok(reader)
217 }
218
219 #[must_use]
223 pub fn delimiter(self, d: u8) -> Self {
224 let rows = parse_csv(&self.raw, d).unwrap_or_default();
225 let mut reader = Self::build(rows, self.has_headers);
226 reader.raw = self.raw;
227 reader
228 }
229
230 #[must_use]
235 pub fn has_headers(self, b: bool) -> Self {
236 let all = self.combined_raw();
237 let mut reader = Self::build(all, b);
238 reader.raw = self.raw;
239 reader
240 }
241
242 pub fn headers(&self) -> Option<&[String]> {
244 self.header_row.as_deref()
245 }
246
247 pub fn rows(&self) -> &[Vec<String>] {
249 &self.data_rows
250 }
251
252 pub fn column(&self, name: &str) -> Option<Vec<&str>> {
256 let idx = self.col_index(name)?;
257 Some(
258 self.data_rows
259 .iter()
260 .filter_map(|row| row.get(idx).map(|s| s.as_str()))
261 .collect(),
262 )
263 }
264
265 pub fn get(&self, row: usize, col: &str) -> Option<&str> {
270 let idx = self.col_index(col)?;
271 self.data_rows.get(row)?.get(idx).map(|s| s.as_str())
272 }
273
274 fn col_index(&self, name: &str) -> Option<usize> {
275 self.header_row
276 .as_ref()?
277 .iter()
278 .position(|h| h == name)
279 }
280
281 fn build(mut rows: Vec<Vec<String>>, has_headers: bool) -> Self {
282 if has_headers && !rows.is_empty() {
283 let header_row = rows.remove(0);
284 Self {
285 header_row: Some(header_row),
286 data_rows: rows,
287 raw: String::new(),
288 has_headers,
289 }
290 } else {
291 Self {
292 header_row: None,
293 data_rows: rows,
294 raw: String::new(),
295 has_headers,
296 }
297 }
298 }
299
300 fn combined_raw(&self) -> Vec<Vec<String>> {
301 let mut all = Vec::new();
302 if let Some(h) = &self.header_row {
303 all.push(h.clone());
304 }
305 all.extend(self.data_rows.clone());
306 all
307 }
308}
309
310
311pub struct CsvWriter {
326 delim: u8,
327 header_row: Option<Vec<String>>,
328 data_rows: Vec<Vec<String>>,
329}
330
331impl CsvWriter {
332 pub fn new() -> Self {
334 Self {
335 delim: b',',
336 header_row: None,
337 data_rows: Vec::new(),
338 }
339 }
340
341 #[must_use]
343 pub fn delimiter(mut self, d: u8) -> Self {
344 self.delim = d;
345 self
346 }
347
348 #[must_use]
350 pub fn headers(mut self, headers: &[&str]) -> Self {
351 self.header_row = Some(headers.iter().map(|s| s.to_string()).collect());
352 self
353 }
354
355 #[must_use]
357 pub fn row(mut self, values: &[&str]) -> Self {
358 self.data_rows.push(values.iter().map(|s| s.to_string()).collect());
359 self
360 }
361
362 pub fn render(&self) -> String {
364 let delim_char = self.delim as char;
365 let mut out = String::new();
366
367 if let Some(h) = &self.header_row {
368 self.write_row(&mut out, h, delim_char);
369 }
370
371 for row in &self.data_rows {
372 self.write_row(&mut out, row, delim_char);
373 }
374
375 out
376 }
377
378 pub fn to_file(&self, path: &str) -> Result<(), CsvError> {
380 let content = self.render();
381 fs::write(path, &content)?;
382 Ok(())
383 }
384
385 fn write_row(&self, out: &mut String, row: &[String], delim_char: char) {
386 for (i, field) in row.iter().enumerate() {
387 if i > 0 {
388 out.push(delim_char);
389 }
390 self.write_field(out, field, delim_char);
391 }
392 out.push('\n');
393 }
394
395 fn write_field(&self, out: &mut String, field: &str, delim_char: char) {
396 let needs_quoting = field.contains(delim_char)
397 || field.contains('"')
398 || field.contains('\n')
399 || field.contains('\r');
400
401 if needs_quoting {
402 out.push('"');
403 for c in field.chars() {
404 if c == '"' {
405 out.push_str("\"\"");
406 } else {
407 out.push(c);
408 }
409 }
410 out.push('"');
411 } else {
412 out.push_str(field);
413 }
414 }
415}
416
417impl Default for CsvWriter {
418 fn default() -> Self {
419 Self::new()
420 }
421}
422
423#[cfg(test)]
424mod tests {
425 use super::*;
426
427 #[test]
428 fn parse_simple_csv() {
429 let reader = CsvReader::parse("a,b,c\n1,2,3\n4,5,6");
430 assert_eq!(reader.rows().len(), 2);
431 assert_eq!(reader.rows()[0], vec!["1", "2", "3"]);
432 assert_eq!(reader.rows()[1], vec!["4", "5", "6"]);
433 }
434
435 #[test]
436 fn parse_with_headers_access_by_column() {
437 let reader = CsvReader::parse("name,age\nAlice,30\nBob,25");
438 assert_eq!(
439 reader.headers(),
440 Some(vec!["name".to_string(), "age".to_string()].as_slice())
441 );
442 assert_eq!(reader.column("name"), Some(vec!["Alice", "Bob"]));
443 assert_eq!(reader.column("age"), Some(vec!["30", "25"]));
444 assert_eq!(reader.column("missing"), None);
445 }
446
447 #[test]
448 fn quoted_fields_with_commas() {
449 let reader = CsvReader::parse("name,address\nAlice,\"123 Main St, Apt 4\"\nBob,\"456 Oak Ave, Suite 5\"");
450 assert_eq!(reader.get(0, "address"), Some("123 Main St, Apt 4"));
451 assert_eq!(reader.get(1, "address"), Some("456 Oak Ave, Suite 5"));
452 }
453
454 #[test]
455 fn quoted_fields_with_embedded_quotes() {
456 let reader = CsvReader::parse("name,quote\nAlice,\"She said \"\"hello\"\"\"\nBob,\"He said \"\"bye\"\"\"");
457 assert_eq!(reader.get(0, "quote"), Some("She said \"hello\""));
458 assert_eq!(reader.get(1, "quote"), Some("He said \"bye\""));
459 }
460
461 #[test]
462 fn quoted_fields_with_newlines() {
463 let data = "name,bio\nAlice,\"Line 1\nLine 2\"\nBob,\"One line\"";
464 let reader = CsvReader::parse(data);
465 assert_eq!(reader.get(0, "bio"), Some("Line 1\nLine 2"));
466 assert_eq!(reader.get(1, "bio"), Some("One line"));
467 assert_eq!(reader.rows().len(), 2);
468 }
469
470 #[test]
471 fn custom_delimiter_semicolon() {
472 let data = "name;age\nAlice;30\nBob;25";
473 let reader = CsvReader::parse(data).delimiter(b';');
474 assert_eq!(reader.get(0, "name"), Some("Alice"));
475 assert_eq!(reader.get(1, "age"), Some("25"));
476 }
477
478 #[test]
479 fn custom_delimiter_tab() {
480 let data = "name\tage\nAlice\t30\nBob\t25";
481 let reader = CsvReader::parse(data).delimiter(b'\t');
482 assert_eq!(reader.get(0, "name"), Some("Alice"));
483 assert_eq!(reader.get(1, "age"), Some("25"));
484 }
485
486 #[test]
487 fn delimiter_auto_detection_semicolon() {
488 let data = "name;age;city\nAlice;30;NYC\nBob;25;LA";
489 let reader = CsvReader::parse(data);
490 assert_eq!(reader.headers().map(|h| h.len()), Some(3));
491 assert_eq!(reader.get(0, "name"), Some("Alice"));
492 assert_eq!(reader.get(0, "city"), Some("NYC"));
493 }
494
495 #[test]
496 fn delimiter_auto_detection_tab() {
497 let data = "name\tage\tcolor\nAlice\t30\tred\nBob\t25\tblue";
498 let reader = CsvReader::parse(data);
499 assert_eq!(reader.get(0, "age"), Some("30"));
500 assert_eq!(reader.get(1, "color"), Some("blue"));
501 }
502
503 #[test]
504 fn writer_basic_output() {
505 let csv = CsvWriter::new()
506 .headers(&["name", "score"])
507 .row(&["Alice", "95"])
508 .row(&["Bob", "87"])
509 .render();
510 assert_eq!(csv, "name,score\nAlice,95\nBob,87\n");
511 }
512
513 #[test]
514 fn writer_quotes_fields_that_need_it() {
515 let csv = CsvWriter::new()
516 .headers(&["name", "address"])
517 .row(&["Alice", "123 Main, Apt 4"])
518 .row(&["Bob", "said \"hi\""])
519 .render();
520 assert_eq!(
521 csv,
522 "name,address\nAlice,\"123 Main, Apt 4\"\nBob,\"said \"\"hi\"\"\"\n"
523 );
524 }
525
526 #[test]
527 fn writer_quotes_fields_with_newlines() {
528 let csv = CsvWriter::new()
529 .headers(&["k", "v"])
530 .row(&["a", "line1\nline2"])
531 .render();
532 assert_eq!(csv, "k,v\na,\"line1\nline2\"\n");
533 }
534
535 #[test]
536 fn round_trip() {
537 let original = CsvWriter::new()
538 .headers(&["name", "value", "note"])
539 .row(&["Alice", "42", "first entry"])
540 .row(&["Bob", "99", "has, comma"])
541 .row(&["Eve", "0", "said \"hi\""])
542 .render();
543
544 let reader = CsvReader::parse(&original);
545 assert_eq!(
546 reader.headers(),
547 Some(
548 vec!["name".to_string(), "value".to_string(), "note".to_string()].as_slice()
549 )
550 );
551 assert_eq!(reader.get(0, "name"), Some("Alice"));
552 assert_eq!(reader.get(1, "note"), Some("has, comma"));
553 assert_eq!(reader.get(2, "note"), Some("said \"hi\""));
554 }
555
556 #[test]
557 fn empty_fields() {
558 let reader = CsvReader::parse("a,b,c\n,,\n1,,3");
559 assert_eq!(reader.rows()[0], vec!["", "", ""]);
560 assert_eq!(reader.rows()[1], vec!["1", "", "3"]);
561 }
562
563 #[test]
564 fn get_and_column_accessors() {
565 let reader = CsvReader::parse("x,y,z\n1,2,3\n4,5,6\n7,8,9");
566 assert_eq!(reader.get(0, "x"), Some("1"));
567 assert_eq!(reader.get(2, "z"), Some("9"));
568 assert_eq!(reader.get(5, "x"), None);
569 assert_eq!(reader.column("y"), Some(vec!["2", "5", "8"]));
570 }
571
572 #[test]
573 fn has_headers_false() {
574 let reader = CsvReader::parse("1,2,3\n4,5,6").has_headers(false);
575 assert_eq!(reader.headers(), None);
576 assert_eq!(reader.rows().len(), 2);
577 assert_eq!(reader.rows()[0], vec!["1", "2", "3"]);
578 }
579
580 #[test]
581 fn file_read_write() {
582 let dir = std::env::temp_dir();
583 let path = dir.join("csv_toolkit_test.csv");
584 let path_str = path.to_str().unwrap();
585
586 CsvWriter::new()
588 .headers(&["a", "b"])
589 .row(&["1", "2"])
590 .row(&["3", "4"])
591 .to_file(path_str)
592 .unwrap();
593
594 let reader = CsvReader::from_path(path_str).unwrap();
596 assert_eq!(reader.headers().map(|h| h.len()), Some(2));
597 assert_eq!(reader.get(0, "a"), Some("1"));
598 assert_eq!(reader.get(1, "b"), Some("4"));
599
600 let _ = std::fs::remove_file(&path);
602 }
603
604 #[test]
605 fn file_read_nonexistent() {
606 let result = CsvReader::from_path("/nonexistent/path/file.csv");
607 assert!(result.is_err());
608 }
609
610 #[test]
611 fn writer_custom_delimiter() {
612 let csv = CsvWriter::new()
613 .delimiter(b';')
614 .headers(&["a", "b"])
615 .row(&["1", "2"])
616 .render();
617 assert_eq!(csv, "a;b\n1;2\n");
618 }
619
620 #[test]
621 fn writer_no_headers() {
622 let csv = CsvWriter::new()
623 .row(&["1", "2"])
624 .row(&["3", "4"])
625 .render();
626 assert_eq!(csv, "1,2\n3,4\n");
627 }
628
629 #[test]
630 fn trailing_newline_optional() {
631 let r1 = CsvReader::parse("a,b\n1,2\n");
633 assert_eq!(r1.rows().len(), 1);
634
635 let r2 = CsvReader::parse("a,b\n1,2");
637 assert_eq!(r2.rows().len(), 1);
638
639 assert_eq!(r1.rows(), r2.rows());
641 }
642
643 #[test]
644 fn crlf_line_endings() {
645 let reader = CsvReader::parse("a,b\r\n1,2\r\n3,4\r\n");
646 assert_eq!(reader.rows().len(), 2);
647 assert_eq!(reader.get(0, "a"), Some("1"));
648 assert_eq!(reader.get(1, "b"), Some("4"));
649 }
650
651 #[test]
652 fn single_column() {
653 let reader = CsvReader::parse("name\nAlice\nBob");
654 assert_eq!(reader.column("name"), Some(vec!["Alice", "Bob"]));
655 }
656
657 #[test]
658 fn unterminated_quote_is_error() {
659 let result = parse_csv("a,\"unclosed\n", b',');
660 assert!(result.is_err());
661 }
662}