Skip to main content

oni_comb_parser/text/
quoted_string.rs

1use alloc::borrow::Cow;
2use alloc::string::String;
3
4use crate::error::ParseError;
5use crate::fail::{Fail, PResult};
6use crate::input::Input;
7use crate::parser::Parser;
8use crate::str_input::StrInput;
9
10/// エスケープなし文字列はゼロコピー (`&'a str`) で返し、
11/// エスケープありの場合のみ `String` にフォールバックする quoted string パーサー。
12pub struct QuotedString;
13
14pub fn quoted_string() -> QuotedString {
15  QuotedString
16}
17
18impl<'a> Parser<StrInput<'a>> for QuotedString {
19  type Error = ParseError;
20  type Output = Cow<'a, str>;
21
22  #[inline]
23  fn parse_next(&mut self, input: &mut StrInput<'a>) -> PResult<Cow<'a, str>, ParseError> {
24    let pos = input.offset();
25    let remaining = input.as_str();
26    let bytes = remaining.as_bytes();
27
28    if bytes.is_empty() || bytes[0] != b'"' {
29      return Err(Fail::Backtrack(ParseError::expected_char(pos, '"')));
30    }
31
32    // Fast path: scan for closing quote without escape
33    let mut i = 1; // skip opening quote
34    loop {
35      if i >= bytes.len() {
36        return Err(Fail::Cut(ParseError::expected_char(pos + i, '"')));
37      }
38      match bytes[i] {
39        b'"' => {
40          let s = &remaining[1..i];
41          input.advance(i + 1);
42          return Ok(Cow::Borrowed(s));
43        }
44        b'\\' => break,
45        _ => i += 1,
46      }
47    }
48
49    // Slow path: build String, reusing the prefix before the first escape
50    let mut result = String::with_capacity(i + 16);
51    result.push_str(&remaining[1..i]);
52
53    let mut chars = remaining[i..].chars();
54    let mut consumed = i; // bytes consumed so far (including opening quote)
55
56    loop {
57      match chars.next() {
58        Some('"') => {
59          consumed += 1;
60          input.advance(consumed);
61          return Ok(Cow::Owned(result));
62        }
63        Some('\\') => {
64          consumed += 1;
65          match chars.next() {
66            Some('"') => {
67              consumed += 1;
68              result.push('"');
69            }
70            Some('\\') => {
71              consumed += 1;
72              result.push('\\');
73            }
74            Some('/') => {
75              consumed += 1;
76              result.push('/');
77            }
78            Some('b') => {
79              consumed += 1;
80              result.push('\u{0008}');
81            }
82            Some('f') => {
83              consumed += 1;
84              result.push('\u{000C}');
85            }
86            Some('n') => {
87              consumed += 1;
88              result.push('\n');
89            }
90            Some('r') => {
91              consumed += 1;
92              result.push('\r');
93            }
94            Some('t') => {
95              consumed += 1;
96              result.push('\t');
97            }
98            Some('u') => {
99              consumed += 1;
100              let mut code: u32 = 0;
101              for _ in 0..4 {
102                match chars.next() {
103                  Some(c) if c.is_ascii_hexdigit() => {
104                    consumed += 1;
105                    code = code * 16 + c.to_digit(16).unwrap();
106                  }
107                  _ => {
108                    return Err(Fail::Cut(ParseError::expected_description(
109                      pos + consumed,
110                      "4 hex digits after \\u",
111                    )));
112                  }
113                }
114              }
115              match char::from_u32(code) {
116                Some(c) => result.push(c),
117                None => {
118                  return Err(Fail::Cut(ParseError::expected_description(
119                    pos + consumed - 4,
120                    "valid unicode code point",
121                  )));
122                }
123              }
124            }
125            Some(_) => {
126              return Err(Fail::Cut(ParseError::expected_description(
127                pos + consumed,
128                "valid escape sequence",
129              )));
130            }
131            None => {
132              return Err(Fail::Cut(ParseError::expected_description(
133                pos + consumed,
134                "escape character after '\\'",
135              )));
136            }
137          }
138        }
139        Some(c) => {
140          consumed += c.len_utf8();
141          result.push(c);
142        }
143        None => {
144          return Err(Fail::Cut(ParseError::expected_char(pos + consumed, '"')));
145        }
146      }
147    }
148  }
149}