1use crate::is_atom_chr;
2use crate::is_atom_string_chr;
3
4#[derive(Copy, Clone, Debug, PartialEq, Eq)]
5pub enum ParsedItem<'a> {
6 Atom(&'a str, usize),
10 ListStart(usize),
14 ListEnd(usize),
18}
19
20#[derive(Clone, Debug, PartialEq, Eq)]
22pub enum ParseError {
23 IllegalChr { pos: usize, chr: char },
25
26 IllegalChrInString { pos: usize, chr: char },
28
29 IllegalChrInComment { pos: usize, chr: char },
31
32 UnfinishedString { pos: usize },
34
35 UnexpectedEof { pos: usize },
37
38 UnexpectedRightParen { pos: usize },
40
41 ExpectedEof { pos: usize },
43}
44
45impl core::fmt::Display for ParseError {
46 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
47 match *self {
48 ParseError::IllegalChr { pos, chr } => {
49 write!(f, "illegal character {:?} at byte {}", chr, pos)
50 }
51 ParseError::IllegalChrInString { pos, chr } => {
52 write!(f, "illegal character {:?} in string at byte {}", chr, pos)
53 }
54 ParseError::IllegalChrInComment { pos, chr } => {
55 write!(f, "illegal character {:?} in comment at byte {}", chr, pos)
56 }
57 ParseError::UnfinishedString { pos } => write!(f, "unfinished string at byte {}", pos),
58 ParseError::UnexpectedEof { pos } => {
59 write!(f, "unexpected end-of-file at byte {}", pos)
60 }
61 ParseError::UnexpectedRightParen { pos } => {
62 write!(f, "unexpected `)` at byte {}", pos)
63 }
64 ParseError::ExpectedEof { pos } => write!(f, "expected end-of-file at byte {}", pos),
65 }
66 }
67}
68
69#[cfg(feature = "std")]
70impl std::error::Error for ParseError {}
71
72pub struct Parser<'a> {
93 lexer: Lexer<'a>,
94 state: State,
95}
96
97enum State {
98 Beginning,
99 Parsing { depth: usize },
100 Finishing,
101}
102
103impl<'a> Parser<'a> {
104 pub fn new(data: &'a str) -> Self {
105 Self {
106 lexer: Lexer::new(data),
107 state: State::Beginning,
108 }
109 }
110
111 pub fn next_item(&mut self) -> Result<ParsedItem<'a>, ParseError> {
112 match self.state {
113 State::Beginning => {
114 let (pos, token) = self.lexer.get_token()?;
115 match token {
116 Token::Eof => Err(ParseError::UnexpectedEof { pos }),
117 Token::LeftParen => {
118 self.state = State::Parsing { depth: 0 };
119 Ok(ParsedItem::ListStart(pos))
120 }
121 Token::RightParen => Err(ParseError::UnexpectedRightParen { pos }),
122 Token::Atom(atom) => {
123 self.state = State::Finishing;
124 Ok(ParsedItem::Atom(atom, pos))
125 }
126 }
127 }
128 State::Parsing { ref mut depth } => {
129 let (pos, token) = self.lexer.get_token()?;
130 match token {
131 Token::Eof => Err(ParseError::UnexpectedEof { pos }),
132 Token::LeftParen => {
133 *depth += 1;
134 Ok(ParsedItem::ListStart(pos))
135 }
136 Token::RightParen => {
137 if *depth == 0 {
138 self.state = State::Finishing;
139 } else {
140 *depth -= 1;
141 }
142 Ok(ParsedItem::ListEnd(pos))
143 }
144 Token::Atom(atom) => Ok(ParsedItem::Atom(atom, pos)),
145 }
146 }
147 State::Finishing => panic!("parsing finished"),
148 }
149 }
150
151 pub fn finish(mut self) -> Result<(), ParseError> {
152 match self.state {
153 State::Finishing => {
154 let (pos, token) = self.lexer.get_token()?;
155 match token {
156 Token::Eof => Ok(()),
157 _ => Err(ParseError::ExpectedEof { pos }),
158 }
159 }
160 _ => panic!("parsing not finished yet"),
161 }
162 }
163}
164
165#[derive(Clone, Debug, PartialEq, Eq)]
166enum Token<'a> {
167 Eof,
168 LeftParen,
169 RightParen,
170 Atom(&'a str),
171}
172
173struct Lexer<'a> {
174 rem_input: &'a str,
175 rem_offset: usize,
176}
177
178impl<'a> Lexer<'a> {
179 fn new(input: &'a str) -> Self {
180 Lexer {
181 rem_input: input,
182 rem_offset: 0,
183 }
184 }
185
186 #[must_use]
187 #[inline]
188 fn eat_any_char(&mut self) -> Option<char> {
189 let mut iter = self.rem_input.chars();
190 if let Some(chr) = iter.next() {
191 let new_rem = iter.as_str();
192 self.rem_offset += self.rem_input.len() - new_rem.len();
193 self.rem_input = new_rem;
194 Some(chr)
195 } else {
196 None
197 }
198 }
199
200 #[must_use]
201 #[inline]
202 fn eat_char(&mut self, chr: char) -> bool {
203 if let Some(new_rem) = self.rem_input.strip_prefix(chr) {
204 self.rem_offset += self.rem_input.len() - new_rem.len();
205 self.rem_input = new_rem;
206 true
207 } else {
208 false
209 }
210 }
211
212 #[must_use]
213 #[inline]
214 fn eat_char_if(&mut self, pred: impl FnMut(char) -> bool) -> bool {
215 if let Some(new_rem) = self.rem_input.strip_prefix(pred) {
216 self.rem_offset += self.rem_input.len() - new_rem.len();
217 self.rem_input = new_rem;
218 true
219 } else {
220 false
221 }
222 }
223
224 fn get_token(&mut self) -> Result<(usize, Token<'a>), ParseError> {
225 loop {
226 let start_str = self.rem_input;
227 let chr_pos = self.rem_offset;
228 if self.eat_char(' ')
229 || self.eat_char('\t')
230 || self.eat_char('\n')
231 || self.eat_char('\r')
232 {
233 } else if self.eat_char(';') {
235 loop {
237 let chr_pos = self.rem_offset;
238 match self.eat_any_char() {
239 None => return Ok((self.rem_offset, Token::Eof)),
240 Some('\n' | '\r') => break,
241 Some('\t' | ' '..='~') => {}
242 Some(chr) => {
243 return Err(ParseError::IllegalChrInComment { chr, pos: chr_pos });
244 }
245 }
246 }
247 } else if self.eat_char('(') {
248 return Ok((chr_pos, Token::LeftParen));
249 } else if self.eat_char(')') {
250 return Ok((chr_pos, Token::RightParen));
251 } else if let Some(chr) = self.eat_any_char() {
252 if is_atom_chr(chr) || chr == '"' {
253 let begin_pos = chr_pos;
254 let end_pos = self.lex_atom(chr)?;
255 let atom = &start_str[..(end_pos - begin_pos)];
256 return Ok((begin_pos, Token::Atom(atom)));
257 } else {
258 return Err(ParseError::IllegalChr { chr, pos: chr_pos });
260 }
261 } else {
262 return Ok((self.rem_offset, Token::Eof));
264 }
265 }
266 }
267
268 fn lex_atom(&mut self, first_chr: char) -> Result<usize, ParseError> {
269 let mut in_string = first_chr == '"';
270 loop {
271 let chr_pos = self.rem_offset;
272 if in_string {
273 if self.eat_char('"') {
274 in_string = false;
275 } else if self.eat_char('\\') {
276 let chr_pos = self.rem_offset;
277 if let Some(chr) = self.eat_any_char() {
278 if chr != '"' && chr != '\\' && !is_atom_string_chr(chr) {
279 return Err(ParseError::IllegalChrInString { chr, pos: chr_pos });
280 }
281 } else {
282 return Err(ParseError::UnfinishedString { pos: chr_pos });
283 }
284 } else if let Some(chr) = self.eat_any_char() {
285 if !is_atom_string_chr(chr) {
286 return Err(ParseError::IllegalChrInString { chr, pos: chr_pos });
287 }
288 } else {
289 return Err(ParseError::UnfinishedString { pos: chr_pos });
290 }
291 } else if self.eat_char('"') {
292 in_string = true;
293 } else if !self.eat_char_if(is_atom_chr) {
294 return Ok(chr_pos);
295 }
296 }
297 }
298}