Skip to main content

reliakit_csv/
reader.rs

1//! Strict, bounded CSV reading.
2
3use alloc::string::String;
4use alloc::vec::Vec;
5
6use crate::error::{CsvError, CsvErrorKind, CsvLimitKind};
7use crate::limits::CsvLimits;
8
9/// Parses CSV text into records, using conservative [`CsvLimits`].
10///
11/// Each record is a `Vec<String>` of fields. The result is rectangular: every
12/// record has the same number of fields as the first, or the read fails. See
13/// the [crate] documentation for the exact accepted grammar.
14///
15/// ```
16/// use reliakit_csv::read_str;
17///
18/// assert_eq!(read_str("a,b\n1,2\n").unwrap(), [["a", "b"], ["1", "2"]]);
19/// assert_eq!(read_str("").unwrap(), Vec::<Vec<String>>::new());
20/// ```
21pub fn read_str(input: &str) -> Result<Vec<Vec<String>>, CsvError> {
22    read_str_with_limits(input, &CsvLimits::conservative())
23}
24
25/// Parses CSV text into records with explicit [`CsvLimits`].
26pub fn read_str_with_limits(input: &str, limits: &CsvLimits) -> Result<Vec<Vec<String>>, CsvError> {
27    if input.len() > limits.max_input_bytes() {
28        return Err(CsvError::new(
29            CsvErrorKind::LimitExceeded(CsvLimitKind::InputBytes),
30            0,
31            1,
32            1,
33            0,
34            0,
35        ));
36    }
37
38    let chars: Vec<(usize, char)> = input.char_indices().collect();
39    let mut parser = Parser {
40        input,
41        chars,
42        pos: 0,
43        limits,
44    };
45
46    let mut records: Vec<Vec<String>> = Vec::new();
47    let mut expected_width: Option<usize> = None;
48
49    while parser.pos < parser.chars.len() {
50        let record_index = records.len();
51        if record_index >= limits.max_records() {
52            return parser.err(
53                CsvErrorKind::LimitExceeded(CsvLimitKind::Records),
54                record_index,
55                0,
56            );
57        }
58
59        let record = parser.parse_record(record_index)?;
60
61        match expected_width {
62            None => expected_width = Some(record.len()),
63            Some(width) if record.len() != width => {
64                // Report at the start of the offending record's terminator
65                // position (current parser position), which is just past it.
66                return parser.err(
67                    CsvErrorKind::FieldCountMismatch {
68                        expected: width,
69                        found: record.len(),
70                    },
71                    record_index,
72                    record.len().saturating_sub(1),
73                );
74            }
75            Some(_) => {}
76        }
77
78        records.push(record);
79    }
80
81    Ok(records)
82}
83
84/// How a single field ended.
85enum FieldEnd {
86    /// A `,` delimiter; another field follows.
87    Delimiter,
88    /// A `\n` or `\r\n` record terminator, or end of input.
89    Record,
90}
91
92struct Parser<'a> {
93    input: &'a str,
94    chars: Vec<(usize, char)>,
95    pos: usize,
96    limits: &'a CsvLimits,
97}
98
99impl Parser<'_> {
100    /// Builds an error at the current parser position for the given location.
101    fn err<T>(&self, kind: CsvErrorKind, record: usize, field: usize) -> Result<T, CsvError> {
102        let offset = self.offset_at(self.pos);
103        let (line, column) = self.line_col(offset);
104        Err(CsvError::new(kind, offset, line, column, record, field))
105    }
106
107    /// The byte offset for a character index (or end of input if past the end).
108    fn offset_at(&self, index: usize) -> usize {
109        self.chars
110            .get(index)
111            .map(|(offset, _)| *offset)
112            .unwrap_or(self.input.len())
113    }
114
115    /// 1-based line and column for a byte offset.
116    fn line_col(&self, offset: usize) -> (usize, usize) {
117        let mut line = 1;
118        let mut column = 1;
119        for (byte_index, ch) in self.input.char_indices() {
120            if byte_index >= offset {
121                break;
122            }
123            if ch == '\n' {
124                line += 1;
125                column = 1;
126            } else {
127                column += 1;
128            }
129        }
130        (line, column)
131    }
132
133    fn peek(&self) -> Option<char> {
134        self.chars.get(self.pos).map(|(_, c)| *c)
135    }
136
137    fn peek_at(&self, ahead: usize) -> Option<char> {
138        self.chars.get(self.pos + ahead).map(|(_, c)| *c)
139    }
140
141    /// Parses one record (one or more fields). Assumes `self.pos < len`.
142    fn parse_record(&mut self, record_index: usize) -> Result<Vec<String>, CsvError> {
143        let mut record: Vec<String> = Vec::new();
144        loop {
145            let field_index = record.len();
146            if field_index >= self.limits.max_fields_per_record() {
147                return self.err(
148                    CsvErrorKind::LimitExceeded(CsvLimitKind::FieldsPerRecord),
149                    record_index,
150                    field_index,
151                );
152            }
153
154            let (field, end) = self.parse_field(record_index, field_index)?;
155            record.push(field);
156            match end {
157                FieldEnd::Delimiter => continue,
158                FieldEnd::Record => break,
159            }
160        }
161        Ok(record)
162    }
163
164    /// Parses one field and reports how it ended.
165    fn parse_field(
166        &mut self,
167        record_index: usize,
168        field_index: usize,
169    ) -> Result<(String, FieldEnd), CsvError> {
170        if self.peek() == Some('"') {
171            self.parse_quoted_field(record_index, field_index)
172        } else {
173            self.parse_unquoted_field(record_index, field_index)
174        }
175    }
176
177    fn parse_quoted_field(
178        &mut self,
179        record_index: usize,
180        field_index: usize,
181    ) -> Result<(String, FieldEnd), CsvError> {
182        self.pos += 1; // consume the opening quote
183        let mut buf = String::new();
184        loop {
185            let Some(c) = self.peek() else {
186                return self.err(
187                    CsvErrorKind::UnterminatedQuotedField,
188                    record_index,
189                    field_index,
190                );
191            };
192            if c == '"' {
193                if self.peek_at(1) == Some('"') {
194                    // Escaped quote: `""` is one literal `"`.
195                    self.push_field_byte(&mut buf, '"', record_index, field_index)?;
196                    self.pos += 2;
197                } else {
198                    // Closing quote: only a delimiter, terminator, or EOF may follow.
199                    self.pos += 1;
200                    return self.finish_after_quote(record_index, field_index, buf);
201                }
202            } else {
203                self.push_field_byte(&mut buf, c, record_index, field_index)?;
204                self.pos += 1;
205            }
206        }
207    }
208
209    fn finish_after_quote(
210        &mut self,
211        record_index: usize,
212        field_index: usize,
213        buf: String,
214    ) -> Result<(String, FieldEnd), CsvError> {
215        match self.peek() {
216            None => Ok((buf, FieldEnd::Record)),
217            Some(',') => {
218                self.pos += 1;
219                Ok((buf, FieldEnd::Delimiter))
220            }
221            Some('\n') => {
222                self.pos += 1;
223                Ok((buf, FieldEnd::Record))
224            }
225            Some('\r') => {
226                if self.peek_at(1) == Some('\n') {
227                    self.pos += 2;
228                    Ok((buf, FieldEnd::Record))
229                } else {
230                    self.err(CsvErrorKind::BareCarriageReturn, record_index, field_index)
231                }
232            }
233            Some(_) => self.err(
234                CsvErrorKind::TextAfterQuotedField,
235                record_index,
236                field_index,
237            ),
238        }
239    }
240
241    fn parse_unquoted_field(
242        &mut self,
243        record_index: usize,
244        field_index: usize,
245    ) -> Result<(String, FieldEnd), CsvError> {
246        let mut buf = String::new();
247        loop {
248            match self.peek() {
249                None => return Ok((buf, FieldEnd::Record)),
250                Some(',') => {
251                    self.pos += 1;
252                    return Ok((buf, FieldEnd::Delimiter));
253                }
254                Some('\n') => {
255                    self.pos += 1;
256                    return Ok((buf, FieldEnd::Record));
257                }
258                Some('\r') => {
259                    if self.peek_at(1) == Some('\n') {
260                        self.pos += 2;
261                        return Ok((buf, FieldEnd::Record));
262                    }
263                    return self.err(CsvErrorKind::BareCarriageReturn, record_index, field_index);
264                }
265                Some('"') => {
266                    return self.err(
267                        CsvErrorKind::QuoteInUnquotedField,
268                        record_index,
269                        field_index,
270                    )
271                }
272                Some(c) => {
273                    self.push_field_byte(&mut buf, c, record_index, field_index)?;
274                    self.pos += 1;
275                }
276            }
277        }
278    }
279
280    /// Appends a character to a field buffer, enforcing the field byte limit.
281    fn push_field_byte(
282        &self,
283        buf: &mut String,
284        c: char,
285        record_index: usize,
286        field_index: usize,
287    ) -> Result<(), CsvError> {
288        if buf.len() + c.len_utf8() > self.limits.max_field_bytes() {
289            return self.err(
290                CsvErrorKind::LimitExceeded(CsvLimitKind::FieldBytes),
291                record_index,
292                field_index,
293            );
294        }
295        buf.push(c);
296        Ok(())
297    }
298}