1use alloc::string::String;
4use alloc::vec::Vec;
5
6use crate::error::{CsvError, CsvErrorKind, CsvLimitKind};
7use crate::limits::CsvLimits;
8
9pub fn read_str(input: &str) -> Result<Vec<Vec<String>>, CsvError> {
22 read_str_with_limits(input, &CsvLimits::conservative())
23}
24
25pub fn read_str_with_limits(input: &str, limits: &CsvLimits) -> Result<Vec<Vec<String>>, CsvError> {
27 if input.len() > limits.max_input_bytes() {
28 return Err(CsvError::new(
29 CsvErrorKind::LimitExceeded(CsvLimitKind::InputBytes),
30 0,
31 1,
32 1,
33 0,
34 0,
35 ));
36 }
37
38 let chars: Vec<(usize, char)> = input.char_indices().collect();
39 let mut parser = Parser {
40 input,
41 chars,
42 pos: 0,
43 limits,
44 };
45
46 let mut records: Vec<Vec<String>> = Vec::new();
47 let mut expected_width: Option<usize> = None;
48
49 while parser.pos < parser.chars.len() {
50 let record_index = records.len();
51 if record_index >= limits.max_records() {
52 return parser.err(
53 CsvErrorKind::LimitExceeded(CsvLimitKind::Records),
54 record_index,
55 0,
56 );
57 }
58
59 let record = parser.parse_record(record_index)?;
60
61 match expected_width {
62 None => expected_width = Some(record.len()),
63 Some(width) if record.len() != width => {
64 return parser.err(
67 CsvErrorKind::FieldCountMismatch {
68 expected: width,
69 found: record.len(),
70 },
71 record_index,
72 record.len().saturating_sub(1),
73 );
74 }
75 Some(_) => {}
76 }
77
78 records.push(record);
79 }
80
81 Ok(records)
82}
83
84enum FieldEnd {
86 Delimiter,
88 Record,
90}
91
92struct Parser<'a> {
93 input: &'a str,
94 chars: Vec<(usize, char)>,
95 pos: usize,
96 limits: &'a CsvLimits,
97}
98
99impl Parser<'_> {
100 fn err<T>(&self, kind: CsvErrorKind, record: usize, field: usize) -> Result<T, CsvError> {
102 let offset = self.offset_at(self.pos);
103 let (line, column) = self.line_col(offset);
104 Err(CsvError::new(kind, offset, line, column, record, field))
105 }
106
107 fn offset_at(&self, index: usize) -> usize {
109 self.chars
110 .get(index)
111 .map(|(offset, _)| *offset)
112 .unwrap_or(self.input.len())
113 }
114
115 fn line_col(&self, offset: usize) -> (usize, usize) {
117 let mut line = 1;
118 let mut column = 1;
119 for (byte_index, ch) in self.input.char_indices() {
120 if byte_index >= offset {
121 break;
122 }
123 if ch == '\n' {
124 line += 1;
125 column = 1;
126 } else {
127 column += 1;
128 }
129 }
130 (line, column)
131 }
132
133 fn peek(&self) -> Option<char> {
134 self.chars.get(self.pos).map(|(_, c)| *c)
135 }
136
137 fn peek_at(&self, ahead: usize) -> Option<char> {
138 self.chars.get(self.pos + ahead).map(|(_, c)| *c)
139 }
140
141 fn parse_record(&mut self, record_index: usize) -> Result<Vec<String>, CsvError> {
143 let mut record: Vec<String> = Vec::new();
144 loop {
145 let field_index = record.len();
146 if field_index >= self.limits.max_fields_per_record() {
147 return self.err(
148 CsvErrorKind::LimitExceeded(CsvLimitKind::FieldsPerRecord),
149 record_index,
150 field_index,
151 );
152 }
153
154 let (field, end) = self.parse_field(record_index, field_index)?;
155 record.push(field);
156 match end {
157 FieldEnd::Delimiter => continue,
158 FieldEnd::Record => break,
159 }
160 }
161 Ok(record)
162 }
163
164 fn parse_field(
166 &mut self,
167 record_index: usize,
168 field_index: usize,
169 ) -> Result<(String, FieldEnd), CsvError> {
170 if self.peek() == Some('"') {
171 self.parse_quoted_field(record_index, field_index)
172 } else {
173 self.parse_unquoted_field(record_index, field_index)
174 }
175 }
176
177 fn parse_quoted_field(
178 &mut self,
179 record_index: usize,
180 field_index: usize,
181 ) -> Result<(String, FieldEnd), CsvError> {
182 self.pos += 1; let mut buf = String::new();
184 loop {
185 let Some(c) = self.peek() else {
186 return self.err(
187 CsvErrorKind::UnterminatedQuotedField,
188 record_index,
189 field_index,
190 );
191 };
192 if c == '"' {
193 if self.peek_at(1) == Some('"') {
194 self.push_field_byte(&mut buf, '"', record_index, field_index)?;
196 self.pos += 2;
197 } else {
198 self.pos += 1;
200 return self.finish_after_quote(record_index, field_index, buf);
201 }
202 } else {
203 self.push_field_byte(&mut buf, c, record_index, field_index)?;
204 self.pos += 1;
205 }
206 }
207 }
208
209 fn finish_after_quote(
210 &mut self,
211 record_index: usize,
212 field_index: usize,
213 buf: String,
214 ) -> Result<(String, FieldEnd), CsvError> {
215 match self.peek() {
216 None => Ok((buf, FieldEnd::Record)),
217 Some(',') => {
218 self.pos += 1;
219 Ok((buf, FieldEnd::Delimiter))
220 }
221 Some('\n') => {
222 self.pos += 1;
223 Ok((buf, FieldEnd::Record))
224 }
225 Some('\r') => {
226 if self.peek_at(1) == Some('\n') {
227 self.pos += 2;
228 Ok((buf, FieldEnd::Record))
229 } else {
230 self.err(CsvErrorKind::BareCarriageReturn, record_index, field_index)
231 }
232 }
233 Some(_) => self.err(
234 CsvErrorKind::TextAfterQuotedField,
235 record_index,
236 field_index,
237 ),
238 }
239 }
240
241 fn parse_unquoted_field(
242 &mut self,
243 record_index: usize,
244 field_index: usize,
245 ) -> Result<(String, FieldEnd), CsvError> {
246 let mut buf = String::new();
247 loop {
248 match self.peek() {
249 None => return Ok((buf, FieldEnd::Record)),
250 Some(',') => {
251 self.pos += 1;
252 return Ok((buf, FieldEnd::Delimiter));
253 }
254 Some('\n') => {
255 self.pos += 1;
256 return Ok((buf, FieldEnd::Record));
257 }
258 Some('\r') => {
259 if self.peek_at(1) == Some('\n') {
260 self.pos += 2;
261 return Ok((buf, FieldEnd::Record));
262 }
263 return self.err(CsvErrorKind::BareCarriageReturn, record_index, field_index);
264 }
265 Some('"') => {
266 return self.err(
267 CsvErrorKind::QuoteInUnquotedField,
268 record_index,
269 field_index,
270 )
271 }
272 Some(c) => {
273 self.push_field_byte(&mut buf, c, record_index, field_index)?;
274 self.pos += 1;
275 }
276 }
277 }
278 }
279
280 fn push_field_byte(
282 &self,
283 buf: &mut String,
284 c: char,
285 record_index: usize,
286 field_index: usize,
287 ) -> Result<(), CsvError> {
288 if buf.len() + c.len_utf8() > self.limits.max_field_bytes() {
289 return self.err(
290 CsvErrorKind::LimitExceeded(CsvLimitKind::FieldBytes),
291 record_index,
292 field_index,
293 );
294 }
295 buf.push(c);
296 Ok(())
297 }
298}