1use std::{borrow::Cow, string::String as StdString};
2
3use crate::{
4 error, utils,
5 visitor::{
6 DictionaryVisitor, EntryVisitor, InnerListVisitor, ItemVisitor, ListVisitor,
7 ParameterVisitor,
8 },
9 BareItemFromInput, Date, Decimal, Integer, KeyRef, Num, SFVResult, String, StringRef, TokenRef,
10 Version,
11};
12
13fn parse_item<'de>(
14 parser: &mut Parser<'de>,
15 visitor: impl ItemVisitor<'de>,
16) -> Result<(), error::Repr> {
17 let param_visitor = visitor.bare_item(parser.parse_bare_item()?)?;
19 parser.parse_parameters(param_visitor)
20}
21
22fn parse_comma_separated<'de>(
23 parser: &mut Parser<'de>,
24 mut parse_member: impl FnMut(&mut Parser<'de>) -> Result<(), error::Repr>,
25) -> Result<(), error::Repr> {
26 while parser.peek().is_some() {
27 parse_member(parser)?;
28
29 parser.consume_ows_chars();
30
31 if parser.peek().is_none() {
32 return Ok(());
33 }
34
35 let comma_index = parser.index;
36
37 if let Some(c) = parser.peek() {
38 if c != b',' {
39 return Err(error::Repr::TrailingCharactersAfterMember(parser.index));
40 }
41 parser.next();
42 }
43
44 parser.consume_ows_chars();
45
46 if parser.peek().is_none() {
47 return Err(error::Repr::TrailingComma(comma_index));
50 }
51 }
52
53 Ok(())
54}
55
56#[must_use]
58pub struct Parser<'de> {
59 input: &'de [u8],
60 index: usize,
61 version: Version,
62}
63
64impl<'de> Parser<'de> {
65 pub fn new(input: &'de (impl ?Sized + AsRef<[u8]>)) -> Self {
67 Self {
68 input: input.as_ref(),
69 index: 0,
70 version: Version::Rfc9651,
71 }
72 }
73
74 pub fn with_version(mut self, version: Version) -> Self {
76 self.version = version;
77 self
78 }
79
80 #[cfg(feature = "parsed-types")]
85 pub fn parse<T: crate::FieldType>(self) -> SFVResult<T> {
86 T::parse(self)
87 }
88
89 #[cfg_attr(
92 feature = "parsed-types",
93 doc = r#"
94
95This can also be used to parse a dictionary that is split into multiple lines by merging
96them into an existing structure:
97
98```
99# use sfv::{Dictionary, FieldType, Parser};
100# fn main() -> Result<(), sfv::Error> {
101let mut dict: Dictionary = Parser::new("a=1").parse()?;
102
103Parser::new("b=2").parse_dictionary_with_visitor(&mut dict)?;
104
105assert_eq!(
106 dict.serialize().as_deref(),
107 Some("a=1, b=2"),
108);
109# Ok(())
110# }
111```
112"#
113 )]
114 pub fn parse_dictionary_with_visitor(
118 self,
119 visitor: &mut (impl ?Sized + DictionaryVisitor<'de>),
120 ) -> SFVResult<()> {
121 self.parse_internal(move |parser| {
123 parse_comma_separated(parser, |parser| {
124 let entry_visitor = visitor.entry(parser.parse_key()?)?;
126
127 if let Some(b'=') = parser.peek() {
128 parser.next();
129 parser.parse_list_entry(entry_visitor)
130 } else {
131 let param_visitor = entry_visitor.bare_item(BareItemFromInput::from(true))?;
132 parser.parse_parameters(param_visitor)
133 }
134 })
135 })
136 }
137
138 #[allow(clippy::needless_raw_string_hashes)] #[cfg_attr(
142 feature = "parsed-types",
143 doc = r##"
144
145This can also be used to parse a list that is split into multiple lines by merging them
146into an existing structure:
147```
148# use sfv::{FieldType, List, Parser};
149# fn main() -> Result<(), sfv::Error> {
150let mut list: List = Parser::new("11, (12 13)").parse()?;
151
152Parser::new(r#""foo", "bar""#).parse_list_with_visitor(&mut list)?;
153
154assert_eq!(
155 list.serialize().as_deref(),
156 Some(r#"11, (12 13), "foo", "bar""#),
157);
158# Ok(())
159# }
160```
161"##
162 )]
163 pub fn parse_list_with_visitor(
167 self,
168 visitor: &mut (impl ?Sized + ListVisitor<'de>),
169 ) -> SFVResult<()> {
170 self.parse_internal(|parser| {
172 parse_comma_separated(parser, |parser| parser.parse_list_entry(visitor.entry()?))
173 })
174 }
175
176 pub fn parse_item_with_visitor(self, visitor: impl ItemVisitor<'de>) -> SFVResult<()> {
182 self.parse_internal(|parser| parse_item(parser, visitor))
183 }
184
185 fn peek(&self) -> Option<u8> {
186 self.input.get(self.index).copied()
187 }
188
189 fn next(&mut self) -> Option<u8> {
190 self.peek().inspect(|_| self.index += 1)
191 }
192
193 fn parse_internal(
196 mut self,
197 f: impl FnOnce(&mut Self) -> Result<(), error::Repr>,
198 ) -> SFVResult<()> {
199 self.consume_sp_chars();
202
203 f(&mut self)?;
204
205 self.consume_sp_chars();
206
207 if self.peek().is_some() {
208 return Err(error::Repr::TrailingCharactersAfterParsedValue(self.index).into());
209 }
210
211 Ok(())
212 }
213
214 fn parse_list_entry(&mut self, visitor: impl EntryVisitor<'de>) -> Result<(), error::Repr> {
215 match self.peek() {
219 Some(b'(') => self.parse_inner_list(visitor.inner_list()?),
220 _ => parse_item(self, visitor),
221 }
222 }
223
224 pub(crate) fn parse_inner_list(
225 &mut self,
226 mut visitor: impl InnerListVisitor<'de>,
227 ) -> Result<(), error::Repr> {
228 if Some(b'(') != self.peek() {
231 return Err(error::Repr::ExpectedStartOfInnerList(self.index));
232 }
233
234 self.next();
235
236 while self.peek().is_some() {
237 self.consume_sp_chars();
238
239 if Some(b')') == self.peek() {
240 self.next();
241 let param_visitor = visitor.finish()?;
242 return self.parse_parameters(param_visitor);
243 }
244
245 parse_item(self, visitor.item()?)?;
246
247 if let Some(c) = self.peek() {
248 if c != b' ' && c != b')' {
249 return Err(error::Repr::ExpectedInnerListDelimiter(self.index));
250 }
251 }
252 }
253
254 Err(error::Repr::UnterminatedInnerList(self.index))
255 }
256
257 pub(crate) fn parse_bare_item(&mut self) -> Result<BareItemFromInput<'de>, error::Repr> {
258 Ok(match self.peek() {
261 Some(b'?') => BareItemFromInput::Boolean(self.parse_bool()?),
262 Some(b'"') => BareItemFromInput::String(self.parse_string()?),
263 Some(b':') => BareItemFromInput::ByteSequence(self.parse_byte_sequence()?),
264 Some(b'@') => BareItemFromInput::Date(self.parse_date()?),
265 Some(b'%') => BareItemFromInput::DisplayString(self.parse_display_string()?),
266 Some(c) if utils::is_allowed_start_token_char(c) => {
267 BareItemFromInput::Token(self.parse_token()?)
268 }
269 Some(c) if c == b'-' || c.is_ascii_digit() => match self.parse_number()? {
270 Num::Decimal(val) => BareItemFromInput::Decimal(val),
271 Num::Integer(val) => BareItemFromInput::Integer(val),
272 },
273 _ => return Err(error::Repr::ExpectedStartOfBareItem(self.index)),
274 })
275 }
276
277 pub(crate) fn parse_bool(&mut self) -> Result<bool, error::Repr> {
278 if self.peek() != Some(b'?') {
281 return Err(error::Repr::ExpectedStartOfBoolean(self.index));
282 }
283
284 self.next();
285
286 match self.peek() {
287 Some(b'0') => {
288 self.next();
289 Ok(false)
290 }
291 Some(b'1') => {
292 self.next();
293 Ok(true)
294 }
295 _ => Err(error::Repr::ExpectedBoolean(self.index)),
296 }
297 }
298
299 pub(crate) fn parse_string(&mut self) -> Result<Cow<'de, StringRef>, error::Repr> {
300 if self.peek() != Some(b'"') {
303 return Err(error::Repr::ExpectedStartOfString(self.index));
304 }
305
306 self.next();
307
308 let start = self.index;
309 let mut output = Cow::Borrowed(&[] as &[u8]);
310
311 while let Some(curr_char) = self.peek() {
312 match curr_char {
313 b'"' => {
314 self.next();
315 return Ok(match output {
318 Cow::Borrowed(output) => {
319 let output = std::str::from_utf8(output).unwrap();
320 Cow::Borrowed(StringRef::from_str(output).unwrap())
321 }
322 Cow::Owned(output) => {
323 let output = StdString::from_utf8(output).unwrap();
324 Cow::Owned(String::from_string(output).unwrap())
325 }
326 });
327 }
328 0x00..=0x1f | 0x7f..=0xff => {
329 return Err(error::Repr::InvalidStringCharacter(self.index));
330 }
331 b'\\' => {
332 self.next();
333 match self.peek() {
334 Some(c @ (b'\\' | b'"')) => {
335 self.next();
336 output.to_mut().push(c);
337 }
338 None => return Err(error::Repr::UnterminatedEscapeSequence(self.index)),
339 Some(_) => return Err(error::Repr::InvalidEscapeSequence(self.index)),
340 }
341 }
342 _ => {
343 self.next();
344 match output {
345 Cow::Borrowed(ref mut output) => *output = &self.input[start..self.index],
346 Cow::Owned(ref mut output) => output.push(curr_char),
347 }
348 }
349 }
350 }
351 Err(error::Repr::UnterminatedString(self.index))
352 }
353
354 fn parse_non_empty_str(
355 &mut self,
356 is_allowed_start_char: impl FnOnce(u8) -> bool,
357 is_allowed_inner_char: impl Fn(u8) -> bool,
358 ) -> Option<&'de str> {
359 let start = self.index;
360
361 match self.peek() {
362 Some(c) if is_allowed_start_char(c) => {
363 self.next();
364 }
365 _ => return None,
366 }
367
368 loop {
369 match self.peek() {
370 Some(c) if is_allowed_inner_char(c) => {
371 self.next();
372 }
373 _ => return Some(std::str::from_utf8(&self.input[start..self.index]).unwrap()),
376 }
377 }
378 }
379
380 pub(crate) fn parse_token(&mut self) -> Result<&'de TokenRef, error::Repr> {
381 match self.parse_non_empty_str(
384 utils::is_allowed_start_token_char,
385 utils::is_allowed_inner_token_char,
386 ) {
387 None => Err(error::Repr::ExpectedStartOfToken(self.index)),
388 Some(str) => Ok(TokenRef::from_validated_str(str)),
389 }
390 }
391
392 pub(crate) fn parse_byte_sequence(&mut self) -> Result<Vec<u8>, error::Repr> {
393 if self.peek() != Some(b':') {
396 return Err(error::Repr::ExpectedStartOfByteSequence(self.index));
397 }
398
399 self.next();
400 let start = self.index;
401
402 loop {
403 match self.next() {
404 Some(b':') => break,
405 Some(_) => {}
406 None => return Err(error::Repr::UnterminatedByteSequence(self.index)),
407 }
408 }
409
410 let colon_index = self.index - 1;
411
412 match base64::Engine::decode(&utils::BASE64, &self.input[start..colon_index]) {
413 Ok(content) => Ok(content),
414 Err(err) => {
415 let index = match err {
416 base64::DecodeError::InvalidByte(offset, _)
417 | base64::DecodeError::InvalidLastSymbol(offset, _) => start + offset,
418 base64::DecodeError::InvalidLength(_) | base64::DecodeError::InvalidPadding => {
422 colon_index - 1
423 }
424 };
425
426 Err(error::Repr::InvalidByteSequence(index))
427 }
428 }
429 }
430
431 pub(crate) fn parse_number(&mut self) -> Result<Num, error::Repr> {
432 fn char_to_i64(c: u8) -> i64 {
435 i64::from(c - b'0')
436 }
437
438 let sign = if let Some(b'-') = self.peek() {
439 self.next();
440 -1
441 } else {
442 1
443 };
444
445 let mut magnitude = match self.peek() {
446 Some(c @ b'0'..=b'9') => {
447 self.next();
448 char_to_i64(c)
449 }
450 _ => return Err(error::Repr::ExpectedDigit(self.index)),
451 };
452
453 let mut digits = 1;
454
455 loop {
456 match self.peek() {
457 Some(b'.') => {
458 if digits > 12 {
459 return Err(error::Repr::TooManyDigitsBeforeDecimalPoint(self.index));
460 }
461 self.next();
462 break;
463 }
464 Some(c @ b'0'..=b'9') => {
465 digits += 1;
466 if digits > 15 {
467 return Err(error::Repr::TooManyDigits(self.index));
468 }
469 self.next();
470 magnitude = magnitude * 10 + char_to_i64(c);
471 }
472 _ => return Ok(Num::Integer(Integer::try_from(sign * magnitude).unwrap())),
473 }
474 }
475
476 magnitude *= 1000;
477 let mut scale = 100;
478
479 while let Some(c @ b'0'..=b'9') = self.peek() {
480 if scale == 0 {
481 return Err(error::Repr::TooManyDigitsAfterDecimalPoint(self.index));
482 }
483
484 self.next();
485 magnitude += char_to_i64(c) * scale;
486 scale /= 10;
487 }
488
489 if scale == 100 {
490 Err(error::Repr::TrailingDecimalPoint(self.index - 1))
493 } else {
494 Ok(Num::Decimal(Decimal::from_integer_scaled_1000(
495 Integer::try_from(sign * magnitude).unwrap(),
496 )))
497 }
498 }
499
500 pub(crate) fn parse_date(&mut self) -> Result<Date, error::Repr> {
501 if self.peek() != Some(b'@') {
504 return Err(error::Repr::ExpectedStartOfDate(self.index));
505 }
506
507 match self.version {
508 Version::Rfc8941 => return Err(error::Repr::Rfc8941Date(self.index)),
509 Version::Rfc9651 => {}
510 }
511
512 let start = self.index;
513 self.next();
514
515 match self.parse_number()? {
516 Num::Integer(seconds) => Ok(Date::from_unix_seconds(seconds)),
517 Num::Decimal(_) => Err(error::Repr::NonIntegerDate(start)),
518 }
519 }
520
521 pub(crate) fn parse_display_string(&mut self) -> Result<Cow<'de, str>, error::Repr> {
522 if self.peek() != Some(b'%') {
525 return Err(error::Repr::ExpectedStartOfDisplayString(self.index));
526 }
527
528 match self.version {
529 Version::Rfc8941 => return Err(error::Repr::Rfc8941DisplayString(self.index)),
530 Version::Rfc9651 => {}
531 }
532
533 self.next();
534
535 if self.peek() != Some(b'"') {
536 return Err(error::Repr::ExpectedQuote(self.index));
537 }
538
539 self.next();
540
541 let start = self.index;
542 let mut output = Cow::Borrowed(&[] as &[u8]);
543
544 while let Some(curr_char) = self.peek() {
545 match curr_char {
546 b'"' => {
547 self.next();
548 return match output {
549 Cow::Borrowed(output) => match std::str::from_utf8(output) {
550 Ok(output) => Ok(Cow::Borrowed(output)),
551 Err(err) => Err(error::Repr::InvalidUtf8InDisplayString(
552 start + err.valid_up_to(),
553 )),
554 },
555 Cow::Owned(output) => match StdString::from_utf8(output) {
556 Ok(output) => Ok(Cow::Owned(output)),
557 Err(err) => Err(error::Repr::InvalidUtf8InDisplayString(
558 start + err.utf8_error().valid_up_to(),
559 )),
560 },
561 };
562 }
563 0x00..=0x1f | 0x7f..=0xff => {
564 return Err(error::Repr::InvalidDisplayStringCharacter(self.index));
565 }
566 b'%' => {
567 self.next();
568
569 let mut octet = 0;
570
571 for _ in 0..2 {
572 octet = (octet << 4)
573 + match self.peek() {
574 Some(c @ b'0'..=b'9') => {
575 self.next();
576 c - b'0'
577 }
578 Some(c @ b'a'..=b'f') => {
579 self.next();
580 c - b'a' + 10
581 }
582 None => {
583 return Err(error::Repr::UnterminatedEscapeSequence(self.index))
584 }
585 Some(_) => {
586 return Err(error::Repr::InvalidEscapeSequence(self.index))
587 }
588 };
589 }
590
591 output.to_mut().push(octet);
592 }
593 _ => {
594 self.next();
595 match output {
596 Cow::Borrowed(ref mut output) => *output = &self.input[start..self.index],
597 Cow::Owned(ref mut output) => output.push(curr_char),
598 }
599 }
600 }
601 }
602 Err(error::Repr::UnterminatedDisplayString(self.index))
603 }
604
605 pub(crate) fn parse_parameters(
606 &mut self,
607 mut visitor: impl ParameterVisitor<'de>,
608 ) -> Result<(), error::Repr> {
609 while let Some(b';') = self.peek() {
612 self.next();
613 self.consume_sp_chars();
614
615 let param_name = self.parse_key()?;
616 let param_value = match self.peek() {
617 Some(b'=') => {
618 self.next();
619 self.parse_bare_item()?
620 }
621 _ => BareItemFromInput::Boolean(true),
622 };
623 visitor.parameter(param_name, param_value)?;
625 }
626
627 visitor.finish()?;
628 Ok(())
629 }
630
631 pub(crate) fn parse_key(&mut self) -> Result<&'de KeyRef, error::Repr> {
632 match self.parse_non_empty_str(
635 utils::is_allowed_start_key_char,
636 utils::is_allowed_inner_key_char,
637 ) {
638 None => Err(error::Repr::ExpectedStartOfKey(self.index)),
639 Some(str) => Ok(KeyRef::from_validated_str(str)),
640 }
641 }
642
643 fn consume_ows_chars(&mut self) {
644 while let Some(b' ' | b'\t') = self.peek() {
645 self.next();
646 }
647 }
648
649 fn consume_sp_chars(&mut self) {
650 while let Some(b' ') = self.peek() {
651 self.next();
652 }
653 }
654
655 #[cfg(test)]
656 pub(crate) fn remaining(&self) -> &[u8] {
657 &self.input[self.index..]
658 }
659}