table_extractor/parser/
csv.rs

1use crate::error::Result;
2use crate::{Parser, Table};
3use csv::ReaderBuilder;
4
5pub struct CsvParser {
6    delimiter: u8,
7}
8
9impl CsvParser {
10    pub fn new(delimiter: u8) -> Self {
11        Self { delimiter }
12    }
13
14    pub fn csv() -> Self {
15        Self::new(b',')
16    }
17
18    pub fn tsv() -> Self {
19        Self::new(b'\t')
20    }
21}
22
23impl Parser for CsvParser {
24    fn parse(&self, input: &str) -> Result<Table> {
25        let mut reader = ReaderBuilder::new()
26            .delimiter(self.delimiter)
27            .has_headers(true)
28            .from_reader(input.as_bytes());
29
30        // Get headers
31        let headers = reader
32            .headers()?
33            .iter()
34            .map(|s| s.to_string())
35            .collect::<Vec<_>>();
36
37        // Get rows with row number tracking for better error messages
38        let mut rows = Vec::new();
39        for (idx, result) in reader.records().enumerate() {
40            let record = result.map_err(|e| {
41                crate::error::Error::ParseError(format!("CSV row {}: {}", idx + 2, e))
42            })?;
43            let row = record.iter().map(|s| s.to_string()).collect();
44            rows.push(row);
45        }
46
47        Table::new_validated(headers, rows)
48    }
49}
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54
55    #[test]
56    fn test_parse_csv() {
57        let input = r#"id,name
581,Preston Carlton's Company
592,Fawzia Masud's Company"#;
60
61        let parser = CsvParser::csv();
62        let table = parser.parse(input).unwrap();
63
64        assert_eq!(table.headers(), &["id", "name"]);
65        assert_eq!(table.rows().len(), 2);
66        assert_eq!(table.rows()[0], vec!["1", "Preston Carlton's Company"]);
67        assert_eq!(table.rows()[1], vec!["2", "Fawzia Masud's Company"]);
68    }
69
70    #[test]
71    fn test_parse_tsv() {
72        let input = "id\tname\n1\tAlice\n2\tBob";
73
74        let parser = CsvParser::tsv();
75        let table = parser.parse(input).unwrap();
76
77        assert_eq!(table.headers(), &["id", "name"]);
78        assert_eq!(table.rows().len(), 2);
79        assert_eq!(table.rows()[0], vec!["1", "Alice"]);
80        assert_eq!(table.rows()[1], vec!["2", "Bob"]);
81    }
82
83    #[test]
84    fn test_csv_error_includes_row_number() {
85        // CSV with inconsistent field count on row 2 (first data row)
86        let input = "id,name,email\n1,Alice,alice@example.com\n2,Bob";
87
88        let parser = CsvParser::csv();
89        let result = parser.parse(input);
90
91        assert!(result.is_err());
92        let err_msg = result.unwrap_err().to_string();
93        // Should include "CSV row 3" (header is row 1, first data is row 2, problem is row 3)
94        assert!(
95            err_msg.contains("CSV row 3"),
96            "Error message should include row number: {}",
97            err_msg
98        );
99    }
100
101    #[test]
102    fn test_csv_error_on_first_data_row() {
103        // CSV with error on the very first data row
104        let input = "id,name,email\n1,Alice";
105
106        let parser = CsvParser::csv();
107        let result = parser.parse(input);
108
109        assert!(result.is_err());
110        let err_msg = result.unwrap_err().to_string();
111        // Should include "CSV row 2" (first data row after header)
112        assert!(
113            err_msg.contains("CSV row 2"),
114            "Error message should include row number: {}",
115            err_msg
116        );
117    }
118
119    #[test]
120    fn test_csv_error_includes_original_error() {
121        // CSV with inconsistent field count
122        let input = "id,name,email\n1,Alice,alice@example.com\n2,Bob,bob@example.com\n3,Charlie";
123
124        let parser = CsvParser::csv();
125        let result = parser.parse(input);
126
127        assert!(result.is_err());
128        let err_msg = result.unwrap_err().to_string();
129
130        // Should include both row number and original error details
131        assert!(err_msg.contains("CSV row 4"), "Should include row number");
132        // The csv crate provides details about the error (field count mismatch)
133        assert!(
134            err_msg.contains("field") || err_msg.contains("2"),
135            "Should include field count details"
136        );
137    }
138}