1use super::{Config, Error, Field, FieldBuilder, Record, Records, Result, SourcePosition};
21use std::{iter, str};
22
23#[derive(Debug)]
24pub struct Parser<'a> {
25 config: &'a Config,
26 chars: iter::Peekable<str::Chars<'a>>,
27 source_line: usize,
28 source_offset: isize,
29}
30
31#[derive(Debug)]
32enum FieldResult {
33 Some(Field),
34 Last(Field),
35 Eof,
36}
37
38impl FieldResult {
39 fn some<F: Into<Field>>(f: F) -> Self {
40 Self::Some(f.into())
41 }
42
43 fn last<F: Into<Field>>(f: F) -> Self {
44 Self::Last(f.into())
45 }
46}
47
48#[derive(Debug)]
49enum RecordResult {
50 Comment,
51 Eof,
52 Some(Record),
53}
54
55impl From<&mut Parser<'_>> for SourcePosition {
56 fn from(p: &mut Parser) -> Self {
57 if let Ok(o) = usize::try_from(p.source_offset) {
58 Self::new(o, p.source_line + p.config.lines_above)
59 } else {
60 panic!("Attempted to create a SourcePosition before the parser has consumed any characters.")
61 }
62 }
63}
64
65fn is_record_terminator(c: char) -> bool {
66 c == '\n'
67}
68
69impl<'a> Parser<'a> {
70 pub(super) fn new(input: &'a str, config: &'a Config) -> Self {
71 Parser {
72 config,
73 chars: input.chars().peekable(),
74 source_line: 0,
75 source_offset: -1,
77 }
78 }
79
80 pub(super) fn parse(&mut self) -> Result<Records> {
81 while self.chars.peek() == Some(&'\n') {
82 self.consume_char();
83 }
84
85 let mut records = vec![];
86 let mut row = 0;
87 loop {
88 match self.parse_record(row)? {
89 RecordResult::Comment => continue,
90 RecordResult::Some(r) => records.push(r),
91 RecordResult::Eof => break,
92 }
93 row += 1;
94 }
95
96 Ok(records)
97 }
98
99 fn is_field_separator(&self, c: char) -> bool {
100 c == self.config.separator
101 }
102
103 fn consume_and_ignore_line(&mut self) {
104 loop {
105 match self.consume_char() {
106 Some('\n') | None => break,
107 Some(_) => continue,
108 }
109 }
110 }
111
112 fn next_is_newline(&mut self) -> bool {
113 self.chars.peek() == Some(&'\n')
114 }
115
116 fn parse_record(&mut self, row: usize) -> Result<RecordResult> {
117 if let Some('#') = self.chars.peek() {
118 self.consume_and_ignore_line();
119 return Ok(RecordResult::Comment);
120 }
121
122 let mut fields = vec![];
123 let mut col = 0;
124 loop {
125 match self.parse_field(FieldBuilder::new((col, row)))? {
126 FieldResult::Some(f) => fields.push(f),
127 FieldResult::Last(f) => {
128 fields.push(f);
129 break;
130 }
131 FieldResult::Eof => {
132 return Ok(if fields.is_empty() {
133 RecordResult::Eof
134 } else {
135 RecordResult::Some(fields)
136 })
137 }
138 };
139 col += 1;
140 }
141
142 Ok(RecordResult::Some(fields))
143 }
144
145 fn consume_whitespace(&mut self) {
146 while let Some(c) = self.chars.peek() {
147 if !is_record_terminator(*c) && c.is_whitespace() {
148 self.consume_char();
149 } else {
150 break;
151 }
152 }
153 }
154
155 fn parse_field(&mut self, mut fb: FieldBuilder) -> Result<FieldResult> {
156 self.consume_whitespace();
157
158 match self.consume_char() {
159 Some(c) if self.is_field_separator(c) => Ok(FieldResult::some(fb)),
160 Some(c) if is_record_terminator(c) => Ok(FieldResult::last(fb)),
161 Some('\\') if self.next_is_newline() => {
162 self.consume_char();
164 self.parse_field(fb)
165 }
166 Some('"') => Ok(self.parse_quoted_field(fb, false)?),
168 Some(c) => {
169 fb.push(c, &mut *self);
170 Ok(self.parse_unquoted_field(fb)?)
171 }
172 None => Ok(FieldResult::Eof),
173 }
174 }
175
176 fn parse_unquoted_field(&mut self, mut fb: FieldBuilder) -> Result<FieldResult> {
177 match self.consume_char() {
178 Some('\\') if self.next_is_newline() => {
179 self.consume_char();
181 self.parse_unquoted_field(fb)
182 }
183 Some(c) if self.is_field_separator(c) => Ok(FieldResult::some(fb)),
184 Some(c) if is_record_terminator(c) => Ok(FieldResult::last(fb)),
185 Some(c) => {
186 fb.push(c, &mut *self);
187 self.parse_unquoted_field(fb)
188 }
189 None => Ok(FieldResult::last(fb)),
190 }
191 }
192
193 fn parse_quoted_field(
194 &mut self,
195 mut fb: FieldBuilder,
196 escape_mode: bool,
197 ) -> Result<FieldResult> {
198 let c = self.consume_char();
199 if escape_mode {
200 if let Some(c) = c {
201 fb.push(c, &mut *self);
202 return self.parse_quoted_field(fb, false);
203 }
204
205 return Err(self.parse_error("Expected a quoted character but got EOF"));
206 }
207
208 match c {
209 Some('"') => {
210 if let Some('"') = self.chars.peek() {
213 self.parse_quoted_field(fb, true)
214 } else {
215 self.parse_rest_of_quoted_field(fb)
218 }
219 }
220 Some(c) => {
221 fb.push(c, &mut *self);
222 self.parse_quoted_field(fb, false)
223 }
224 None => Ok(FieldResult::some(fb)),
225 }
226 }
227
228 fn parse_rest_of_quoted_field(&mut self, fb: FieldBuilder) -> Result<FieldResult> {
232 loop {
233 match self.consume_char() {
234 Some(c) if c.is_whitespace() => continue,
235 Some(c) if self.is_field_separator(c) => return Ok(FieldResult::some(fb)),
236 Some(c) => {
237 return Err(self.parse_error(format!(
239 "Invalid trailing character after quoted string: {c}"
240 )));
241 }
242 None => return Ok(FieldResult::last(fb)),
243 }
244 }
245 }
246
247 fn consume_char(&mut self) -> Option<char> {
248 if let Some(c) = self.chars.next() {
249 if is_record_terminator(c) {
250 self.source_line += 1;
251 self.source_offset = 0;
252 } else {
253 self.source_offset += 1;
254 }
255 Some(c)
256 } else {
257 None
258 }
259 }
260
261 fn parse_error<S: Into<String>>(&mut self, message: S) -> Error {
262 Error::ParseError {
263 bad_input: self.chars.clone().take(10).collect::<String>(),
264 message: message.into(),
265 position: self.into(),
266 }
267 }
268}
269
270pub fn parse<'a>(input: &'a str, config: &'a Config) -> Result<Records> {
276 Parser::new(input, config).parse()
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use crate::*;
283
284 fn test_parse(s: &str) -> Records {
285 parse(s, &Config::default()).unwrap()
286 }
287
288 #[test]
289 fn source_position_from_parser() {
290 let config = Config::default();
291 let mut parser = Parser::new("foo", &config);
292 parser.parse().unwrap();
293 let source_position: SourcePosition = (&mut parser).into();
294
295 assert_eq!(source_position.line_number, 0);
296 assert_eq!(source_position.line_offset, 2);
297 }
298
299 #[test]
300 fn source_position_from_parser_lines_above() {
301 let config = Config {
302 lines_above: 100,
303 ..Config::default()
304 };
305 let mut parser = Parser::new("foo", &config);
306 parser.parse().unwrap();
307 let source_position: SourcePosition = (&mut parser).into();
308
309 assert_eq!(source_position.line_number, 100);
310 assert_eq!(source_position.line_offset, 2);
311 }
312
313 #[test]
314 fn parse_simple() {
315 let cells = test_parse("foo,bar,baz");
316
317 assert_eq!(cells.len(), 1);
318 assert_eq!(cells[0].len(), 3);
319
320 let cell = &cells[0][0];
321 assert_eq!(cell.value, "foo");
322 assert_eq!(cell.address, (0, 0).into());
323 assert_eq!(cell.positions[0].line_offset, 0);
324 assert_eq!(cell.positions[1].line_offset, 1);
325 assert_eq!(cell.positions[2].line_offset, 2);
326 assert_eq!(cell.positions[0].line_number, 0);
327 assert_eq!(cell.positions[1].line_number, 0);
328 assert_eq!(cell.positions[2].line_number, 0);
329
330 let cell = &cells[0][1];
331 assert_eq!(cell.value, "bar");
332 assert_eq!(cell.address, (1, 0).into());
333 assert_eq!(cell.positions[0].line_offset, 4);
334 assert_eq!(cell.positions[1].line_offset, 5);
335 assert_eq!(cell.positions[2].line_offset, 6);
336 assert_eq!(cell.positions[0].line_number, 0);
337 assert_eq!(cell.positions[1].line_number, 0);
338 assert_eq!(cell.positions[2].line_number, 0);
339
340 let cell = &cells[0][2];
341 assert_eq!(cell.value, "baz");
342 assert_eq!(cell.address, (2, 0).into());
343 assert_eq!(cell.positions[0].line_offset, 8);
344 assert_eq!(cell.positions[1].line_offset, 9);
345 assert_eq!(cell.positions[2].line_offset, 10);
346 assert_eq!(cell.positions[0].line_number, 0);
347 assert_eq!(cell.positions[1].line_number, 0);
348 assert_eq!(cell.positions[2].line_number, 0);
349 }
350
351 #[test]
352 fn parse_empty_cell() {
353 let cells = test_parse("foo,,baz");
354
355 assert_eq!(cells.len(), 1);
356 assert_eq!(cells[0].len(), 3);
357 assert_eq!(cells[0][0].value, "foo");
358 assert_eq!(cells[0][0].address, (0, 0).into());
359 assert_eq!(cells[0][1].value, "");
360 assert_eq!(cells[0][1].address, (1, 0).into());
361 assert_eq!(cells[0][2].value, "baz");
362 assert_eq!(cells[0][2].address, (2, 0).into());
363 }
364
365 #[test]
366 fn parse_multiple_lines() {
367 let cells = test_parse("foo,bar,baz\nfoos,bars,bazs");
368
369 assert_eq!(cells.len(), 2);
370 assert_eq!(cells[0].len(), 3);
371 assert_eq!(cells[0][0].value, "foo");
372 assert_eq!(cells[0][1].value, "bar");
373 assert_eq!(cells[0][2].value, "baz");
374 assert_eq!(cells[1][0].value, "foos");
375 assert_eq!(cells[1][1].value, "bars");
376 assert_eq!(cells[1][2].value, "bazs");
377 }
378
379 #[test]
380 fn parse_spaces() {
381 let cells = test_parse(" foo , bar ,one two three");
382
383 assert_eq!(cells.len(), 1);
384 assert_eq!(cells[0].len(), 3);
385
386 let cell = &cells[0][0];
387 assert_eq!(cell.value, "foo");
388 assert_eq!(cell.positions[0].line_offset, 3);
389 assert_eq!(cell.positions[1].line_offset, 4);
390 assert_eq!(cell.positions[2].line_offset, 5);
391
392 let cell = &cells[0][1];
393 assert_eq!(cell.value, "bar");
394 assert_eq!(cell.positions[0].line_offset, 12);
395 assert_eq!(cell.positions[1].line_offset, 13);
396 assert_eq!(cell.positions[2].line_offset, 14);
397
398 let cell = &cells[0][2];
399 assert_eq!(cell.value, "one two three");
400 assert_eq!(cell.positions[0].line_offset, 19);
401 assert_eq!(cell.positions[1].line_offset, 20);
402 assert_eq!(cell.positions[2].line_offset, 21);
403 }
404
405 #[test]
406 fn parse_trailing_newline() {
407 let cells = test_parse("foo\nbar\n");
408
409 assert_eq!(cells.len(), 2);
410 assert_eq!(cells[0][0].address, (0, 0).into());
411 assert_eq!(cells[1][0].address, (0, 1).into());
412 }
413
414 #[test]
415 fn parse_leading_newline() {
416 let cells = test_parse("\nfoo\nbar\n");
417
418 assert_eq!(cells.len(), 2);
419 assert_eq!(cells[0][0].address, (0, 0).into());
420 assert_eq!(cells[1][0].address, (0, 1).into());
421 }
422
423 #[test]
424 fn parse_windows_newline() {
425 let cells = test_parse("foo\r\nbar\r\nbaz\r\n");
426
427 assert_eq!(cells.len(), 3);
428 }
429
430 #[test]
431 fn parse_quoted() {
432 let cells = test_parse(r#""this, is, a, quoted, sentence",bar"#);
433
434 assert_eq!(cells.len(), 1);
435 assert_eq!(cells[0].len(), 2);
436 assert_eq!(cells[0][0].value, "this, is, a, quoted, sentence");
437 assert_eq!(cells[0][1].value, "bar");
438 }
439
440 #[test]
441 fn parse_quoted_newline() {
442 let cells = test_parse("\"this field \n has a newline\",bar");
443
444 assert_eq!(cells.len(), 1);
445 assert_eq!(cells[0].len(), 2);
446 assert_eq!(cells[0][0].value, "this field \n has a newline");
447 }
448
449 #[test]
450 fn parse_quoted_quote() {
451 let cells = test_parse("\"this field has a quote \"\"\",bar");
452
453 assert_eq!(cells.len(), 1);
454 assert_eq!(cells[0].len(), 2);
455 assert_eq!(cells[0][0].value, "this field has a quote \"");
456 }
457
458 #[test]
459 fn parse_comment() {
460 let cells = test_parse("# this is a comment\nfoo,bar\n# another comment");
461
462 assert_eq!(cells.len(), 1);
463 assert_eq!(cells[0].len(), 2);
464 assert_eq!(cells[0][0].value, "foo");
465 assert_eq!(cells[0][1].value, "bar");
466 }
467
468 #[test]
469 fn parse_multiline_field() {
470 let cells = test_parse("this \\\nspans \\\nmultiple lines");
471
472 assert_eq!(cells.len(), 1);
473 assert_eq!(cells[0].len(), 1);
474 assert_eq!(cells[0][0].value, "this spans multiple lines");
475 }
476
477 #[test]
478 fn parse_trailing_comma_newline() {
479 let cells = test_parse("foo ,\n");
480
481 assert_eq!(cells.len(), 1);
482 assert_eq!(cells[0].len(), 2);
483 }
484
485 #[test]
486 fn parse_trailing_comma_no_newline() {
487 let cells = test_parse(
488 r"[[var=a1]]A1,foo,bar
489![[f=10]],bar,=var2
490foo
491[[l]]test,
492![[l]]test1,test2,test3,",
493 );
494
495 assert_eq!(cells.len(), 5);
496 }
497
498 #[test]
499 fn parse_ending_quote() {
500 let cells = test_parse("\"=profit\" ,\"=fees\"");
501
502 assert_eq!(cells.len(), 1);
503 assert_eq!(cells[0].len(), 2);
504 }
505}