table_extractor/parser/
csv.rs1use crate::error::Result;
2use crate::{Parser, Table};
3use csv::ReaderBuilder;
4
5pub struct CsvParser {
6 delimiter: u8,
7}
8
9impl CsvParser {
10 pub fn new(delimiter: u8) -> Self {
11 Self { delimiter }
12 }
13
14 pub fn csv() -> Self {
15 Self::new(b',')
16 }
17
18 pub fn tsv() -> Self {
19 Self::new(b'\t')
20 }
21}
22
23impl Parser for CsvParser {
24 fn parse(&self, input: &str) -> Result<Table> {
25 let mut reader = ReaderBuilder::new()
26 .delimiter(self.delimiter)
27 .has_headers(true)
28 .from_reader(input.as_bytes());
29
30 let headers = reader
32 .headers()?
33 .iter()
34 .map(|s| s.to_string())
35 .collect::<Vec<_>>();
36
37 let mut rows = Vec::new();
39 for (idx, result) in reader.records().enumerate() {
40 let record = result.map_err(|e| {
41 crate::error::Error::ParseError(format!("CSV row {}: {}", idx + 2, e))
42 })?;
43 let row = record.iter().map(|s| s.to_string()).collect();
44 rows.push(row);
45 }
46
47 Table::new_validated(headers, rows)
48 }
49}
50
51#[cfg(test)]
52mod tests {
53 use super::*;
54
55 #[test]
56 fn test_parse_csv() {
57 let input = r#"id,name
581,Preston Carlton's Company
592,Fawzia Masud's Company"#;
60
61 let parser = CsvParser::csv();
62 let table = parser.parse(input).unwrap();
63
64 assert_eq!(table.headers(), &["id", "name"]);
65 assert_eq!(table.rows().len(), 2);
66 assert_eq!(table.rows()[0], vec!["1", "Preston Carlton's Company"]);
67 assert_eq!(table.rows()[1], vec!["2", "Fawzia Masud's Company"]);
68 }
69
70 #[test]
71 fn test_parse_tsv() {
72 let input = "id\tname\n1\tAlice\n2\tBob";
73
74 let parser = CsvParser::tsv();
75 let table = parser.parse(input).unwrap();
76
77 assert_eq!(table.headers(), &["id", "name"]);
78 assert_eq!(table.rows().len(), 2);
79 assert_eq!(table.rows()[0], vec!["1", "Alice"]);
80 assert_eq!(table.rows()[1], vec!["2", "Bob"]);
81 }
82
83 #[test]
84 fn test_csv_error_includes_row_number() {
85 let input = "id,name,email\n1,Alice,alice@example.com\n2,Bob";
87
88 let parser = CsvParser::csv();
89 let result = parser.parse(input);
90
91 assert!(result.is_err());
92 let err_msg = result.unwrap_err().to_string();
93 assert!(
95 err_msg.contains("CSV row 3"),
96 "Error message should include row number: {}",
97 err_msg
98 );
99 }
100
101 #[test]
102 fn test_csv_error_on_first_data_row() {
103 let input = "id,name,email\n1,Alice";
105
106 let parser = CsvParser::csv();
107 let result = parser.parse(input);
108
109 assert!(result.is_err());
110 let err_msg = result.unwrap_err().to_string();
111 assert!(
113 err_msg.contains("CSV row 2"),
114 "Error message should include row number: {}",
115 err_msg
116 );
117 }
118
119 #[test]
120 fn test_csv_error_includes_original_error() {
121 let input = "id,name,email\n1,Alice,alice@example.com\n2,Bob,bob@example.com\n3,Charlie";
123
124 let parser = CsvParser::csv();
125 let result = parser.parse(input);
126
127 assert!(result.is_err());
128 let err_msg = result.unwrap_err().to_string();
129
130 assert!(err_msg.contains("CSV row 4"), "Should include row number");
132 assert!(
134 err_msg.contains("field") || err_msg.contains("2"),
135 "Should include field count details"
136 );
137 }
138}