Skip to main content

philiprehberger_csv_toolkit/
lib.rs

1//! High-level CSV reading, writing, and manipulation.
2//!
3//! Fully RFC 4180 compliant with zero external dependencies. Supports quoted fields,
4//! escaped quotes (doubled `""`), newlines within quoted fields, custom delimiters,
5//! and automatic delimiter detection.
6//!
7//! # Reading CSV
8//!
9//! ```
10//! use philiprehberger_csv_toolkit::CsvReader;
11//!
12//! let data = "name,age,city\nAlice,30,NYC\nBob,25,LA";
13//! let reader = CsvReader::parse(data);
14//!
15//! assert_eq!(reader.get(0, "name"), Some("Alice"));
16//! assert_eq!(reader.column("age"), Some(vec!["30", "25"]));
17//! ```
18//!
19//! # Writing CSV
20//!
21//! ```
22//! use philiprehberger_csv_toolkit::CsvWriter;
23//!
24//! let output = CsvWriter::new()
25//!     .headers(&["name", "score"])
26//!     .row(&["Alice", "95"])
27//!     .row(&["Bob", "87"])
28//!     .render();
29//!
30//! assert_eq!(output, "name,score\nAlice,95\nBob,87\n");
31//! ```
32
33use std::fmt;
34use std::fs;
35use std::io;
36
37/// Errors that can occur during CSV operations.
38#[derive(Debug)]
39pub enum CsvError {
40    /// An I/O error occurred (e.g., file not found).
41    IoError(String),
42    /// A parsing error occurred at a specific line.
43    ParseError {
44        /// The 1-based line number where the error occurred.
45        line: usize,
46        /// A description of the error.
47        message: String,
48    },
49}
50
51impl fmt::Display for CsvError {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        match self {
54            CsvError::IoError(msg) => write!(f, "I/O error: {msg}"),
55            CsvError::ParseError { line, message } => {
56                write!(f, "parse error at line {line}: {message}")
57            }
58        }
59    }
60}
61
62impl std::error::Error for CsvError {}
63
64impl From<io::Error> for CsvError {
65    fn from(e: io::Error) -> Self {
66        CsvError::IoError(e.to_string())
67    }
68}
69
70/// Parse RFC 4180 CSV data into rows of fields.
71///
72/// Handles quoted fields containing delimiters, newlines, and escaped quotes (`""`).
73fn parse_csv(input: &str, delimiter: u8) -> Result<Vec<Vec<String>>, CsvError> {
74    let delim = delimiter as char;
75    let mut rows: Vec<Vec<String>> = Vec::new();
76    let mut current_row: Vec<String> = Vec::new();
77    let mut field = String::new();
78    let mut in_quotes = false;
79    let mut chars = input.chars().peekable();
80    let mut logical_line: usize = 1;
81
82    while let Some(c) = chars.next() {
83        if in_quotes {
84            if c == '"' {
85                // Check for escaped quote ""
86                if chars.peek() == Some(&'"') {
87                    chars.next();
88                    field.push('"');
89                } else {
90                    // End of quoted field
91                    in_quotes = false;
92                }
93            } else {
94                if c == '\n' {
95                    logical_line += 1;
96                }
97                field.push(c);
98            }
99        } else if c == '"' {
100            if field.is_empty() {
101                // Start of quoted field
102                in_quotes = true;
103            } else {
104                // Quote in middle of unquoted field — be lenient, just include it
105                field.push(c);
106            }
107        } else if c == delim {
108            current_row.push(std::mem::take(&mut field));
109        } else if c == '\n' {
110            current_row.push(std::mem::take(&mut field));
111            rows.push(std::mem::take(&mut current_row));
112            logical_line += 1;
113        } else if c == '\r' {
114            // Skip \r, handle \r\n
115            if chars.peek() == Some(&'\n') {
116                // Will be handled by the \n branch on next iteration
117            } else {
118                // Bare \r acts as line ending
119                current_row.push(std::mem::take(&mut field));
120                rows.push(std::mem::take(&mut current_row));
121                logical_line += 1;
122            }
123        } else {
124            field.push(c);
125        }
126    }
127
128    if in_quotes {
129        return Err(CsvError::ParseError {
130            line: logical_line,
131            message: "unterminated quoted field".to_string(),
132        });
133    }
134
135    // Handle last field/row (if file doesn't end with newline)
136    if !field.is_empty() || !current_row.is_empty() {
137        current_row.push(field);
138        rows.push(current_row);
139    }
140
141    Ok(rows)
142}
143
144/// Detect the most likely delimiter from a set of candidates.
145///
146/// Tries `,`, `;`, `\t`, and `|`. Picks the delimiter that produces the most
147/// consistent (non-zero) column count across all rows.
148fn detect_delimiter(input: &str) -> u8 {
149    let candidates: &[u8] = b",;\t|";
150    let mut best = b',';
151    let mut best_score: usize = 0;
152
153    for &delim in candidates {
154        if let Ok(rows) = parse_csv(input, delim) {
155            if rows.is_empty() {
156                continue;
157            }
158            let first_len = rows[0].len();
159            if first_len <= 1 {
160                continue;
161            }
162            // Score = number of rows with the same column count as the first row
163            let consistent = rows.iter().filter(|r| r.len() == first_len).count();
164            let score = consistent * first_len;
165            if score > best_score {
166                best_score = score;
167                best = delim;
168            }
169        }
170    }
171
172    best
173}
174
175/// A CSV reader that parses CSV data and provides access to headers, rows, and cells.
176///
177/// # Example
178///
179/// ```
180/// use philiprehberger_csv_toolkit::CsvReader;
181///
182/// let reader = CsvReader::parse("a,b\n1,2\n3,4");
183/// assert_eq!(reader.rows().len(), 2);
184/// assert_eq!(reader.get(0, "a"), Some("1"));
185/// ```
186pub struct CsvReader {
187    header_row: Option<Vec<String>>,
188    data_rows: Vec<Vec<String>>,
189    raw: String,
190    has_headers: bool,
191}
192
193impl CsvReader {
194    /// Parse CSV from a string.
195    ///
196    /// By default, treats the first row as headers and auto-detects the delimiter.
197    /// Use [`delimiter`](CsvReader::delimiter) and [`has_headers`](CsvReader::has_headers)
198    /// to customize behavior.
199    pub fn parse(data: &str) -> Self {
200        let delim = detect_delimiter(data);
201        let rows = parse_csv(data, delim).unwrap_or_default();
202        let mut reader = Self::build(rows, true);
203        reader.raw = data.to_string();
204        reader
205    }
206
207    /// Read CSV from a file path.
208    ///
209    /// Returns a [`CsvError::IoError`] if the file cannot be read.
210    pub fn from_path(path: &str) -> Result<Self, CsvError> {
211        let data = fs::read_to_string(path)?;
212        let delim = detect_delimiter(&data);
213        let rows = parse_csv(&data, delim)?;
214        let mut reader = Self::build(rows, true);
215        reader.raw = data;
216        Ok(reader)
217    }
218
219    /// Set the field delimiter, re-parsing the data.
220    ///
221    /// This replaces auto-detection with the specified delimiter.
222    #[must_use]
223    pub fn delimiter(self, d: u8) -> Self {
224        let rows = parse_csv(&self.raw, d).unwrap_or_default();
225        let mut reader = Self::build(rows, self.has_headers);
226        reader.raw = self.raw;
227        reader
228    }
229
230    /// Set whether the first row should be treated as headers.
231    ///
232    /// When `true` (default), the first row is accessible via [`headers()`](CsvReader::headers)
233    /// and is excluded from [`rows()`](CsvReader::rows).
234    #[must_use]
235    pub fn has_headers(self, b: bool) -> Self {
236        let all = self.combined_raw();
237        let mut reader = Self::build(all, b);
238        reader.raw = self.raw;
239        reader
240    }
241
242    /// Get the header row, if headers are enabled.
243    pub fn headers(&self) -> Option<&[String]> {
244        self.header_row.as_deref()
245    }
246
247    /// Get all data rows (excluding the header row when headers are enabled).
248    pub fn rows(&self) -> &[Vec<String>] {
249        &self.data_rows
250    }
251
252    /// Get all values for a column by header name.
253    ///
254    /// Returns `None` if headers are not enabled or the column name is not found.
255    pub fn column(&self, name: &str) -> Option<Vec<&str>> {
256        let idx = self.col_index(name)?;
257        Some(
258            self.data_rows
259                .iter()
260                .filter_map(|row| row.get(idx).map(|s| s.as_str()))
261                .collect(),
262        )
263    }
264
265    /// Get a single cell value by row index and column name.
266    ///
267    /// Returns `None` if the row index is out of bounds, headers are not enabled,
268    /// or the column name is not found.
269    pub fn get(&self, row: usize, col: &str) -> Option<&str> {
270        let idx = self.col_index(col)?;
271        self.data_rows.get(row)?.get(idx).map(|s| s.as_str())
272    }
273
274    fn col_index(&self, name: &str) -> Option<usize> {
275        self.header_row
276            .as_ref()?
277            .iter()
278            .position(|h| h == name)
279    }
280
281    fn build(mut rows: Vec<Vec<String>>, has_headers: bool) -> Self {
282        if has_headers && !rows.is_empty() {
283            let header_row = rows.remove(0);
284            Self {
285                header_row: Some(header_row),
286                data_rows: rows,
287                raw: String::new(),
288                has_headers,
289            }
290        } else {
291            Self {
292                header_row: None,
293                data_rows: rows,
294                raw: String::new(),
295                has_headers,
296            }
297        }
298    }
299
300    fn combined_raw(&self) -> Vec<Vec<String>> {
301        let mut all = Vec::new();
302        if let Some(h) = &self.header_row {
303            all.push(h.clone());
304        }
305        all.extend(self.data_rows.clone());
306        all
307    }
308}
309
310
311/// A CSV writer that builds CSV output from headers and rows.
312///
313/// # Example
314///
315/// ```
316/// use philiprehberger_csv_toolkit::CsvWriter;
317///
318/// let csv = CsvWriter::new()
319///     .headers(&["x", "y"])
320///     .row(&["1", "2"])
321///     .render();
322///
323/// assert_eq!(csv, "x,y\n1,2\n");
324/// ```
325pub struct CsvWriter {
326    delim: u8,
327    header_row: Option<Vec<String>>,
328    data_rows: Vec<Vec<String>>,
329}
330
331impl CsvWriter {
332    /// Create a new CSV writer with the default comma delimiter.
333    pub fn new() -> Self {
334        Self {
335            delim: b',',
336            header_row: None,
337            data_rows: Vec::new(),
338        }
339    }
340
341    /// Set the field delimiter (default: `,`).
342    #[must_use]
343    pub fn delimiter(mut self, d: u8) -> Self {
344        self.delim = d;
345        self
346    }
347
348    /// Set the header row.
349    #[must_use]
350    pub fn headers(mut self, headers: &[&str]) -> Self {
351        self.header_row = Some(headers.iter().map(|s| s.to_string()).collect());
352        self
353    }
354
355    /// Add a data row.
356    #[must_use]
357    pub fn row(mut self, values: &[&str]) -> Self {
358        self.data_rows.push(values.iter().map(|s| s.to_string()).collect());
359        self
360    }
361
362    /// Render the CSV data as a string.
363    pub fn render(&self) -> String {
364        let delim_char = self.delim as char;
365        let mut out = String::new();
366
367        if let Some(h) = &self.header_row {
368            self.write_row(&mut out, h, delim_char);
369        }
370
371        for row in &self.data_rows {
372            self.write_row(&mut out, row, delim_char);
373        }
374
375        out
376    }
377
378    /// Write the CSV data to a file.
379    pub fn to_file(&self, path: &str) -> Result<(), CsvError> {
380        let content = self.render();
381        fs::write(path, &content)?;
382        Ok(())
383    }
384
385    fn write_row(&self, out: &mut String, row: &[String], delim_char: char) {
386        for (i, field) in row.iter().enumerate() {
387            if i > 0 {
388                out.push(delim_char);
389            }
390            self.write_field(out, field, delim_char);
391        }
392        out.push('\n');
393    }
394
395    fn write_field(&self, out: &mut String, field: &str, delim_char: char) {
396        let needs_quoting = field.contains(delim_char)
397            || field.contains('"')
398            || field.contains('\n')
399            || field.contains('\r');
400
401        if needs_quoting {
402            out.push('"');
403            for c in field.chars() {
404                if c == '"' {
405                    out.push_str("\"\"");
406                } else {
407                    out.push(c);
408                }
409            }
410            out.push('"');
411        } else {
412            out.push_str(field);
413        }
414    }
415}
416
417impl Default for CsvWriter {
418    fn default() -> Self {
419        Self::new()
420    }
421}
422
423#[cfg(test)]
424mod tests {
425    use super::*;
426
427    #[test]
428    fn parse_simple_csv() {
429        let reader = CsvReader::parse("a,b,c\n1,2,3\n4,5,6");
430        assert_eq!(reader.rows().len(), 2);
431        assert_eq!(reader.rows()[0], vec!["1", "2", "3"]);
432        assert_eq!(reader.rows()[1], vec!["4", "5", "6"]);
433    }
434
435    #[test]
436    fn parse_with_headers_access_by_column() {
437        let reader = CsvReader::parse("name,age\nAlice,30\nBob,25");
438        assert_eq!(
439            reader.headers(),
440            Some(vec!["name".to_string(), "age".to_string()].as_slice())
441        );
442        assert_eq!(reader.column("name"), Some(vec!["Alice", "Bob"]));
443        assert_eq!(reader.column("age"), Some(vec!["30", "25"]));
444        assert_eq!(reader.column("missing"), None);
445    }
446
447    #[test]
448    fn quoted_fields_with_commas() {
449        let reader = CsvReader::parse("name,address\nAlice,\"123 Main St, Apt 4\"\nBob,\"456 Oak Ave, Suite 5\"");
450        assert_eq!(reader.get(0, "address"), Some("123 Main St, Apt 4"));
451        assert_eq!(reader.get(1, "address"), Some("456 Oak Ave, Suite 5"));
452    }
453
454    #[test]
455    fn quoted_fields_with_embedded_quotes() {
456        let reader = CsvReader::parse("name,quote\nAlice,\"She said \"\"hello\"\"\"\nBob,\"He said \"\"bye\"\"\"");
457        assert_eq!(reader.get(0, "quote"), Some("She said \"hello\""));
458        assert_eq!(reader.get(1, "quote"), Some("He said \"bye\""));
459    }
460
461    #[test]
462    fn quoted_fields_with_newlines() {
463        let data = "name,bio\nAlice,\"Line 1\nLine 2\"\nBob,\"One line\"";
464        let reader = CsvReader::parse(data);
465        assert_eq!(reader.get(0, "bio"), Some("Line 1\nLine 2"));
466        assert_eq!(reader.get(1, "bio"), Some("One line"));
467        assert_eq!(reader.rows().len(), 2);
468    }
469
470    #[test]
471    fn custom_delimiter_semicolon() {
472        let data = "name;age\nAlice;30\nBob;25";
473        let reader = CsvReader::parse(data).delimiter(b';');
474        assert_eq!(reader.get(0, "name"), Some("Alice"));
475        assert_eq!(reader.get(1, "age"), Some("25"));
476    }
477
478    #[test]
479    fn custom_delimiter_tab() {
480        let data = "name\tage\nAlice\t30\nBob\t25";
481        let reader = CsvReader::parse(data).delimiter(b'\t');
482        assert_eq!(reader.get(0, "name"), Some("Alice"));
483        assert_eq!(reader.get(1, "age"), Some("25"));
484    }
485
486    #[test]
487    fn delimiter_auto_detection_semicolon() {
488        let data = "name;age;city\nAlice;30;NYC\nBob;25;LA";
489        let reader = CsvReader::parse(data);
490        assert_eq!(reader.headers().map(|h| h.len()), Some(3));
491        assert_eq!(reader.get(0, "name"), Some("Alice"));
492        assert_eq!(reader.get(0, "city"), Some("NYC"));
493    }
494
495    #[test]
496    fn delimiter_auto_detection_tab() {
497        let data = "name\tage\tcolor\nAlice\t30\tred\nBob\t25\tblue";
498        let reader = CsvReader::parse(data);
499        assert_eq!(reader.get(0, "age"), Some("30"));
500        assert_eq!(reader.get(1, "color"), Some("blue"));
501    }
502
503    #[test]
504    fn writer_basic_output() {
505        let csv = CsvWriter::new()
506            .headers(&["name", "score"])
507            .row(&["Alice", "95"])
508            .row(&["Bob", "87"])
509            .render();
510        assert_eq!(csv, "name,score\nAlice,95\nBob,87\n");
511    }
512
513    #[test]
514    fn writer_quotes_fields_that_need_it() {
515        let csv = CsvWriter::new()
516            .headers(&["name", "address"])
517            .row(&["Alice", "123 Main, Apt 4"])
518            .row(&["Bob", "said \"hi\""])
519            .render();
520        assert_eq!(
521            csv,
522            "name,address\nAlice,\"123 Main, Apt 4\"\nBob,\"said \"\"hi\"\"\"\n"
523        );
524    }
525
526    #[test]
527    fn writer_quotes_fields_with_newlines() {
528        let csv = CsvWriter::new()
529            .headers(&["k", "v"])
530            .row(&["a", "line1\nline2"])
531            .render();
532        assert_eq!(csv, "k,v\na,\"line1\nline2\"\n");
533    }
534
535    #[test]
536    fn round_trip() {
537        let original = CsvWriter::new()
538            .headers(&["name", "value", "note"])
539            .row(&["Alice", "42", "first entry"])
540            .row(&["Bob", "99", "has, comma"])
541            .row(&["Eve", "0", "said \"hi\""])
542            .render();
543
544        let reader = CsvReader::parse(&original);
545        assert_eq!(
546            reader.headers(),
547            Some(
548                vec!["name".to_string(), "value".to_string(), "note".to_string()].as_slice()
549            )
550        );
551        assert_eq!(reader.get(0, "name"), Some("Alice"));
552        assert_eq!(reader.get(1, "note"), Some("has, comma"));
553        assert_eq!(reader.get(2, "note"), Some("said \"hi\""));
554    }
555
556    #[test]
557    fn empty_fields() {
558        let reader = CsvReader::parse("a,b,c\n,,\n1,,3");
559        assert_eq!(reader.rows()[0], vec!["", "", ""]);
560        assert_eq!(reader.rows()[1], vec!["1", "", "3"]);
561    }
562
563    #[test]
564    fn get_and_column_accessors() {
565        let reader = CsvReader::parse("x,y,z\n1,2,3\n4,5,6\n7,8,9");
566        assert_eq!(reader.get(0, "x"), Some("1"));
567        assert_eq!(reader.get(2, "z"), Some("9"));
568        assert_eq!(reader.get(5, "x"), None);
569        assert_eq!(reader.column("y"), Some(vec!["2", "5", "8"]));
570    }
571
572    #[test]
573    fn has_headers_false() {
574        let reader = CsvReader::parse("1,2,3\n4,5,6").has_headers(false);
575        assert_eq!(reader.headers(), None);
576        assert_eq!(reader.rows().len(), 2);
577        assert_eq!(reader.rows()[0], vec!["1", "2", "3"]);
578    }
579
580    #[test]
581    fn file_read_write() {
582        let dir = std::env::temp_dir();
583        let path = dir.join("csv_toolkit_test.csv");
584        let path_str = path.to_str().unwrap();
585
586        // Write
587        CsvWriter::new()
588            .headers(&["a", "b"])
589            .row(&["1", "2"])
590            .row(&["3", "4"])
591            .to_file(path_str)
592            .unwrap();
593
594        // Read back
595        let reader = CsvReader::from_path(path_str).unwrap();
596        assert_eq!(reader.headers().map(|h| h.len()), Some(2));
597        assert_eq!(reader.get(0, "a"), Some("1"));
598        assert_eq!(reader.get(1, "b"), Some("4"));
599
600        // Cleanup
601        let _ = std::fs::remove_file(&path);
602    }
603
604    #[test]
605    fn file_read_nonexistent() {
606        let result = CsvReader::from_path("/nonexistent/path/file.csv");
607        assert!(result.is_err());
608    }
609
610    #[test]
611    fn writer_custom_delimiter() {
612        let csv = CsvWriter::new()
613            .delimiter(b';')
614            .headers(&["a", "b"])
615            .row(&["1", "2"])
616            .render();
617        assert_eq!(csv, "a;b\n1;2\n");
618    }
619
620    #[test]
621    fn writer_no_headers() {
622        let csv = CsvWriter::new()
623            .row(&["1", "2"])
624            .row(&["3", "4"])
625            .render();
626        assert_eq!(csv, "1,2\n3,4\n");
627    }
628
629    #[test]
630    fn trailing_newline_optional() {
631        // With trailing newline
632        let r1 = CsvReader::parse("a,b\n1,2\n");
633        assert_eq!(r1.rows().len(), 1);
634
635        // Without trailing newline
636        let r2 = CsvReader::parse("a,b\n1,2");
637        assert_eq!(r2.rows().len(), 1);
638
639        // Both should give same result
640        assert_eq!(r1.rows(), r2.rows());
641    }
642
643    #[test]
644    fn crlf_line_endings() {
645        let reader = CsvReader::parse("a,b\r\n1,2\r\n3,4\r\n");
646        assert_eq!(reader.rows().len(), 2);
647        assert_eq!(reader.get(0, "a"), Some("1"));
648        assert_eq!(reader.get(1, "b"), Some("4"));
649    }
650
651    #[test]
652    fn single_column() {
653        let reader = CsvReader::parse("name\nAlice\nBob");
654        assert_eq!(reader.column("name"), Some(vec!["Alice", "Bob"]));
655    }
656
657    #[test]
658    fn unterminated_quote_is_error() {
659        let result = parse_csv("a,\"unclosed\n", b',');
660        assert!(result.is_err());
661    }
662}