json_syntax/parse/
string.rs

1use super::{Context, Error, Parse, Parser};
2use decoded_char::DecodedChar;
3use locspan::{Meta, Span};
4use smallstr::SmallString;
5
6fn is_control(c: char) -> bool {
7	('\u{0000}'..='\u{001f}').contains(&c)
8}
9
10fn parse_hex4<C, E>(parser: &mut Parser<C, E>) -> Result<u32, Error<E>>
11where
12	C: Iterator<Item = Result<DecodedChar, E>>,
13{
14	match parser.next_char()? {
15		(p, Some(c)) => match c.to_digit(16) {
16			Some(h3) => match parser.next_char()? {
17				(p, Some(c)) => match c.to_digit(16) {
18					Some(h2) => match parser.next_char()? {
19						(p, Some(c)) => match c.to_digit(16) {
20							Some(h1) => match parser.next_char()? {
21								(p, Some(c)) => match c.to_digit(16) {
22									Some(h0) => Ok(h3 << 12 | h2 << 8 | h1 << 4 | h0),
23									None => Err(Error::unexpected(p, Some(c))),
24								},
25								(p, unexpected) => Err(Error::unexpected(p, unexpected)),
26							},
27							None => Err(Error::unexpected(p, Some(c))),
28						},
29						(p, unexpected) => Err(Error::unexpected(p, unexpected)),
30					},
31					None => Err(Error::unexpected(p, Some(c))),
32				},
33				(p, unexpected) => Err(Error::unexpected(p, unexpected)),
34			},
35			None => Err(Error::unexpected(p, Some(c))),
36		},
37		(p, unexpected) => Err(Error::unexpected(p, unexpected)),
38	}
39}
40
41impl<A: smallvec::Array<Item = u8>> Parse for SmallString<A> {
42	fn parse_in<C, E>(
43		parser: &mut Parser<C, E>,
44		_context: Context,
45	) -> Result<Meta<Self, usize>, Error<E>>
46	where
47		C: Iterator<Item = Result<DecodedChar, E>>,
48	{
49		let i = parser.begin_fragment();
50		match parser.next_char()? {
51			(_, Some('"')) => {
52				let mut result = Self::new();
53				let mut high_surrogate: Option<(usize, u32)> = None;
54				loop {
55					let c = match parser.next_char()? {
56						(p, Some('"')) => {
57							if let Some((p_high, high)) = high_surrogate {
58								if parser.options.accept_truncated_surrogate_pair {
59									result.push('\u{fffd}');
60								} else {
61									break Err(Error::MissingLowSurrogate(
62										Span::new(p_high, p),
63										high as u16,
64									));
65								}
66							}
67
68							parser.end_fragment(i);
69							break Ok(Meta(result, i));
70						}
71						(_, Some('\\')) => match parser.next_char()? {
72							(_, Some(c @ ('"' | '\\' | '/'))) => c,
73							(_, Some('b')) => '\u{0008}',
74							(_, Some('t')) => '\u{0009}',
75							(_, Some('n')) => '\u{000a}',
76							(_, Some('f')) => '\u{000c}',
77							(_, Some('r')) => '\u{000d}',
78							(p, Some('u')) => {
79								let codepoint = parse_hex4(parser)?;
80
81								match high_surrogate.take() {
82									Some((p_high, high)) => {
83										if (0xdc00..=0xdfff).contains(&codepoint) {
84											let low = codepoint;
85											let codepoint =
86												((high - 0xd800) << 10 | (low - 0xdc00)) + 0x010000;
87											match char::from_u32(codepoint) {
88												Some(c) => c,
89												None => {
90													if parser.options.accept_invalid_codepoints {
91														'\u{fffd}'
92													} else {
93														break Err(Error::InvalidUnicodeCodePoint(
94															Span::new(p_high, parser.position),
95															codepoint,
96														));
97													}
98												}
99											}
100										} else if parser.options.accept_truncated_surrogate_pair {
101											result.push('\u{fffd}');
102
103											match char::from_u32(codepoint) {
104												Some(c) => c,
105												None => {
106													if parser.options.accept_invalid_codepoints {
107														'\u{fffd}'
108													} else {
109														break Err(Error::InvalidUnicodeCodePoint(
110															Span::new(p, parser.position),
111															codepoint,
112														));
113													}
114												}
115											}
116										} else {
117											break Err(Error::InvalidLowSurrogate(
118												Span::new(p, parser.position),
119												high as u16,
120												codepoint,
121											));
122										}
123									}
124									None => {
125										if (0xd800..=0xdbff).contains(&codepoint) {
126											high_surrogate = Some((p, codepoint));
127											continue;
128										} else {
129											match char::from_u32(codepoint) {
130												Some(c) => c,
131												None => {
132													if parser.options.accept_invalid_codepoints {
133														'\u{fffd}'
134													} else {
135														break Err(Error::InvalidUnicodeCodePoint(
136															Span::new(p, parser.position),
137															codepoint,
138														));
139													}
140												}
141											}
142										}
143									}
144								}
145							}
146							(p, unexpected) => break Err(Error::unexpected(p, unexpected)),
147						},
148						(_, Some(c)) if !is_control(c) => c,
149						(p, unexpected) => break Err(Error::unexpected(p, unexpected)),
150					};
151
152					if let Some((p_high, high)) = high_surrogate.take() {
153						if parser.options.accept_truncated_surrogate_pair {
154							result.push('\u{fffd}');
155						} else {
156							break Err(Error::MissingLowSurrogate(
157								Span::new(p_high, parser.position),
158								high as u16,
159							));
160						}
161					}
162
163					result.push(c);
164				}
165			}
166			(p, unexpected) => Err(Error::unexpected(p, unexpected)),
167		}
168	}
169}