1use carta_ast::{
13 Alignment, Attr, Block, Caption, Cell, ColSpec, ColWidth, Document, Inline, Row, Table,
14 TableBody, TableFoot, TableHead,
15};
16use carta_core::{Reader, ReaderOptions, Result};
17
18#[derive(Debug, Default, Clone, Copy)]
20pub struct CsvReader;
21
22impl Reader for CsvReader {
23 fn read(&self, input: &str, _options: &ReaderOptions) -> Result<Document> {
24 Ok(build_document(parse_records(input, ',', true)))
25 }
26}
27
28pub(crate) fn parse_records(input: &str, delimiter: char, quoting: bool) -> Vec<Vec<String>> {
32 let mut records = Vec::new();
33 let mut record = Vec::new();
34 let mut field = String::new();
35 let mut chars = input
36 .strip_prefix('\u{feff}')
37 .unwrap_or(input)
38 .chars()
39 .peekable();
40
41 loop {
42 match chars.next() {
43 None => break,
44 Some('"') if quoting && field.is_empty() => {
45 read_quoted_field(&mut chars, &mut field);
46 }
47 Some(c) if c == delimiter => {
48 record.push(std::mem::take(&mut field));
49 skip_leading_blanks(&mut chars, delimiter);
50 }
51 Some('\r') => {
52 if chars.peek() == Some(&'\n') {
53 chars.next();
54 }
55 record.push(std::mem::take(&mut field));
56 records.push(std::mem::take(&mut record));
57 }
58 Some('\n') => {
59 record.push(std::mem::take(&mut field));
60 records.push(std::mem::take(&mut record));
61 }
62 Some(c) => field.push(c),
63 }
64 }
65
66 if !field.is_empty() || !record.is_empty() {
67 record.push(field);
68 records.push(record);
69 }
70
71 records
72}
73
74fn skip_leading_blanks(chars: &mut std::iter::Peekable<std::str::Chars<'_>>, delimiter: char) {
77 while let Some(&c) = chars.peek() {
78 if (c == ' ' || c == '\t') && c != delimiter {
79 chars.next();
80 } else {
81 break;
82 }
83 }
84}
85
86fn read_quoted_field(chars: &mut std::iter::Peekable<std::str::Chars<'_>>, field: &mut String) {
89 while let Some(c) = chars.next() {
90 if c == '"' {
91 if chars.peek() == Some(&'"') {
92 chars.next();
93 field.push('"');
94 } else {
95 return;
96 }
97 } else {
98 field.push(c);
99 }
100 }
101}
102
103pub(crate) fn build_document(records: Vec<Vec<String>>) -> Document {
106 let mut records = records.into_iter();
107 let Some(header) = records.next() else {
108 return Document::default();
109 };
110
111 let column_count = header.len();
112 let col_specs = (0..column_count)
113 .map(|_| ColSpec {
114 align: Alignment::AlignDefault,
115 width: ColWidth::ColWidthDefault,
116 })
117 .collect();
118
119 let head = TableHead {
120 attr: Attr::default(),
121 rows: vec![field_row(header, column_count)],
122 };
123 let body_rows = records
124 .map(|record| field_row(record, column_count))
125 .collect();
126 let body = TableBody {
127 attr: Attr::default(),
128 row_head_columns: 0,
129 head: Vec::new(),
130 body: body_rows,
131 };
132
133 let table = Table {
134 attr: Attr::default(),
135 caption: Caption::default(),
136 col_specs,
137 head,
138 bodies: vec![body],
139 foot: TableFoot::default(),
140 };
141
142 Document {
143 blocks: vec![Block::Table(Box::new(table))],
144 ..Default::default()
145 }
146}
147
148fn field_row(fields: Vec<String>, column_count: usize) -> Row {
151 let mut cells: Vec<Cell> = fields
152 .into_iter()
153 .take(column_count)
154 .map(|field| field_cell(&field))
155 .collect();
156 while cells.len() < column_count {
157 cells.push(field_cell(""));
158 }
159 Row {
160 attr: Attr::default(),
161 cells,
162 }
163}
164
165fn field_cell(field: &str) -> Cell {
166 let inlines = field_inlines(field);
167 let content = if inlines.is_empty() {
168 Vec::new()
169 } else {
170 vec![Block::Plain(inlines)]
171 };
172 Cell {
173 attr: Attr::default(),
174 align: Alignment::AlignDefault,
175 row_span: 1,
176 col_span: 1,
177 content,
178 }
179}
180
181fn field_inlines(field: &str) -> Vec<Inline> {
185 let cleaned: String = field.chars().filter(|&c| c != '\r').collect();
186 let cleaned = match cleaned.strip_suffix('\n') {
187 Some(trimmed) => trimmed,
188 None => &cleaned,
189 };
190
191 let mut inlines = Vec::new();
192 let mut chars = cleaned.chars().peekable();
193 while let Some(&c) = chars.peek() {
194 if is_separator(c) {
195 let mut newlines = 0;
196 while let Some(&w) = chars.peek() {
197 if w == '\n' {
198 newlines += 1;
199 chars.next();
200 } else if is_separator(w) {
201 chars.next();
202 } else {
203 break;
204 }
205 }
206 if newlines == 0 {
207 inlines.push(Inline::Space);
208 } else {
209 for _ in 0..newlines {
210 inlines.push(Inline::LineBreak);
211 }
212 }
213 } else {
214 let mut word = String::new();
215 while let Some(&w) = chars.peek() {
216 if is_separator(w) {
217 break;
218 }
219 word.push(w);
220 chars.next();
221 }
222 inlines.push(Inline::Str(word.into()));
223 }
224 }
225
226 inlines
227}
228
229fn is_separator(c: char) -> bool {
232 matches!(c, ' ' | '\t' | '\n')
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 fn tags(inlines: &[Inline]) -> Vec<&'static str> {
240 inlines
241 .iter()
242 .map(|inline| match inline {
243 Inline::Str(_) => "Str",
244 Inline::Space => "Space",
245 Inline::LineBreak => "LineBreak",
246 _ => "other",
247 })
248 .collect()
249 }
250
251 #[test]
252 fn collapses_whitespace_runs_to_single_space() {
253 assert_eq!(tags(&field_inlines("x y")), ["Str", "Space", "Str"]);
254 assert_eq!(tags(&field_inlines("x\ty")), ["Str", "Space", "Str"]);
255 }
256
257 #[test]
258 fn keeps_leading_and_trailing_space_around_words() {
259 assert_eq!(tags(&field_inlines(" x ")), ["Space", "Str", "Space"]);
260 }
261
262 #[test]
263 fn pure_whitespace_field_is_one_space() {
264 assert_eq!(tags(&field_inlines(" ")), ["Space"]);
265 }
266
267 #[test]
268 fn embedded_newlines_become_line_breaks() {
269 assert_eq!(tags(&field_inlines("x\ny")), ["Str", "LineBreak", "Str"]);
270 assert_eq!(
271 tags(&field_inlines("x\n\ny")),
272 ["Str", "LineBreak", "LineBreak", "Str"]
273 );
274 }
275
276 #[test]
277 fn single_trailing_newline_is_dropped() {
278 assert!(field_inlines("\n").is_empty());
279 assert_eq!(tags(&field_inlines(" \n")), ["Space"]);
280 assert_eq!(tags(&field_inlines("\n ")), ["LineBreak"]);
281 }
282
283 #[test]
284 fn carriage_returns_are_removed() {
285 assert_eq!(tags(&field_inlines("x\ry")), ["Str"]);
286 assert_eq!(tags(&field_inlines("x\r\ny")), ["Str", "LineBreak", "Str"]);
287 }
288
289 #[test]
290 fn non_ascii_whitespace_stays_in_word() {
291 assert_eq!(tags(&field_inlines("x\u{a0}y")), ["Str"]);
292 }
293
294 #[test]
295 fn quoting_protects_delimiter_and_escapes_quote() {
296 let records = parse_records("\"a,b\",\"c\"\"d\"\n", ',', true);
297 assert_eq!(records, vec![vec!["a,b".to_owned(), "c\"d".to_owned()]]);
298 }
299
300 #[test]
301 fn tab_records_keep_quotes_literal() {
302 let records = parse_records("\"a\"\tb\n", '\t', false);
303 assert_eq!(records, vec![vec!["\"a\"".to_owned(), "b".to_owned()]]);
304 }
305
306 #[test]
307 fn leading_blanks_after_delimiter_are_skipped() {
308 let records = parse_records("a, b,\tc\n", ',', true);
309 assert_eq!(
310 records,
311 vec![vec!["a".to_owned(), "b".to_owned(), "c".to_owned()]]
312 );
313 }
314
315 #[test]
316 fn first_field_keeps_leading_blanks() {
317 let records = parse_records(" a,b\n", ',', true);
318 assert_eq!(records, vec![vec![" a".to_owned(), "b".to_owned()]]);
319 }
320
321 #[test]
322 fn crlf_and_bare_lf_both_end_records() {
323 let records = parse_records("a,b\r\nc,d\ne,f", ',', true);
324 assert_eq!(records.len(), 3);
325 }
326
327 #[test]
328 fn empty_input_yields_empty_document() {
329 assert!(
330 build_document(parse_records("", ',', true))
331 .blocks
332 .is_empty()
333 );
334 }
335
336 #[test]
337 fn leading_byte_order_mark_is_stripped() {
338 let records = parse_records("\u{feff}a,b\n", ',', true);
339 assert_eq!(records, vec![vec!["a".to_owned(), "b".to_owned()]]);
340 }
341}