1use core::fmt;
2use std::borrow::Cow;
3
4use super::{
5 arena::Arena,
6 ast::{
7 ArgList, Ast, AstRef, Element, Filter, FilterCall, FilterList, FilterSelect, Inline, Leaf,
8 Qualifier, RValue, Selector, SelectorCombinator, SelectorList, Statement, StatementList,
9 },
10 scanner::{Lexeme, Scanner, Span, Token},
11};
12
13#[derive(Debug)]
14pub struct Parser<'a> {
15 scanner: Scanner<'a>,
16 arena: Arena<Ast<'a>>,
17}
18
19#[derive(Debug, Clone)]
20#[non_exhaustive]
21pub enum ParseError {
22 UnexpectedToken {
23 expected: Vec<Token>,
24 got: Token,
25 value: String,
26 span: Span,
27 },
28}
29
30impl fmt::Display for ParseError {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 match self {
33 Self::UnexpectedToken {
34 expected,
35 got,
36 span,
37 value,
38 } => {
39 write!(
40 f,
41 "Expected one of {expected:?}, got {got:?} '{value}' on line {}",
42 span.line
43 )
44 }
45 }
46 }
47}
48
49impl std::error::Error for ParseError {}
50
51impl ParseError {
52 pub fn unexpected(expected: Vec<Token>, lx: Lexeme<'_>, span: Span) -> Self {
55 Self::UnexpectedToken {
56 expected,
57 got: lx.token,
58 value: lx.value.to_owned(),
59 span,
60 }
61 }
62}
63
64type Result<T> = std::result::Result<T, ParseError>;
65
66impl<'a> Parser<'a> {
67 #[must_use]
68 pub const fn new(input: &'a str) -> Self {
69 Self {
70 scanner: Scanner::new(input),
71 arena: Arena::new(),
72 }
73 }
74
75 pub fn parse(mut self) -> Result<(Arena<Ast<'a>>, Option<AstRef<'a, StatementList<'a>>>)> {
76 let r = match self.parse_statement_list() {
77 Ok(r) => r,
78 Err(e) => {
79 return Err(e);
80 }
81 };
82 self.try_eat(Token::Eof)?;
83 Ok((self.arena, r))
84 }
85
86 pub fn parse_statement_list(&mut self) -> Result<Option<AstRef<'a, StatementList<'a>>>> {
87 let (_, lx) = self.scanner.peek_non_whitespace();
88
89 if lx.token == Token::Id {
90 let statement = self.parse_statement()?;
91 let next = self.parse_statement_list()?;
92
93 Ok(Some(
94 self.arena
95 .insert_variant(StatementList::new(statement, next)),
96 ))
97 } else {
98 Ok(None)
99 }
100 }
101
102 fn parse_statement(&mut self) -> Result<Statement<'a>> {
103 let id = self.try_eat(Token::Id)?.value;
104 self.try_eat(Token::Colon)?;
105 let value = self.parse_rvalue()?;
106 let filters = self.parse_filter_list()?;
107 self.try_eat(Token::Semi)?;
108 Ok(Statement { id, value, filters })
109 }
110
111 fn parse_rvalue(&mut self) -> Result<RValue<'a>> {
112 let (_, lx) = self.scanner.peek_non_whitespace();
113
114 match lx.token {
115 Token::Id | Token::Less | Token::Dot | Token::Hash => {
116 self.parse_element().map(RValue::Element)
117 }
118 _ => self.parse_leaf().map(RValue::Leaf),
119 }
120 }
121
122 fn parse_leaf(&mut self) -> Result<Leaf<'a>> {
123 self.scanner.peek_non_whitespace();
124 let (span, lx) = self.scanner.eat_token();
125 match lx.token {
126 Token::String => Ok(Leaf::String(parse_string_literal(lx.value))),
127 Token::Float => Ok(Leaf::Float(
128 lx.value.parse().expect("float literal invalid"),
129 )),
130 Token::Int => Ok(Leaf::Int(lx.value.parse().expect("int literal invalid"))),
131 Token::Dollar => {
132 let id = self.try_eat(Token::Id)?.value;
133 Ok(Leaf::Var(id))
134 }
135 _ => Err(ParseError::unexpected(
136 vec![Token::String, Token::Float, Token::Int, Token::Dollar],
137 lx,
138 span,
139 )),
140 }
141 }
142
143 #[inline]
144 fn try_eat(&mut self, tk: Token) -> Result<Lexeme<'a>> {
145 let (span, lx) = self.scanner.peek_non_whitespace();
146 self.scanner.eat_token();
147
148 if lx.token == tk {
149 Ok(lx)
150 } else {
151 Err(ParseError::unexpected(vec![tk], lx, span))
152 }
153 }
154
155 fn parse_element(&mut self) -> Result<Element<'a>> {
156 let url = self.parse_maybe_url()?;
157 let selector_head = self.parse_selector()?;
158 let selectors = self.parse_selector_list()?;
159
160 self.try_eat(Token::BraceOpen)?;
161
162 let statements = self.parse_statement_list()?;
163
164 self.try_eat(Token::BraceClose)?;
165
166 let qualifier = self.parse_qualifier()?;
167
168 Ok(Element {
169 url,
170 selector_head,
171 selectors,
172 qualifier,
173 statements,
174 })
175 }
176
177 fn parse_maybe_url(&mut self) -> Result<Option<Inline<'a>>> {
178 let (_, lx) = self.scanner.peek_non_whitespace();
179 if lx.token == Token::Less {
180 self.parse_inline().map(Some)
181 } else {
182 Ok(None)
183 }
184 }
185
186 fn parse_inline(&mut self) -> Result<Inline<'a>> {
187 self.try_eat(Token::Less)?;
188 let value = self.parse_leaf()?;
189 let filters = self.parse_filter_list()?;
190 self.try_eat(Token::Greater)?;
191 Ok(Inline { value, filters })
192 }
193
194 fn parse_value(&mut self) -> Result<Inline<'a>> {
195 let (span, lx) = self.scanner.peek_non_whitespace();
196 match lx.token {
197 Token::Less => self.parse_inline(),
198 Token::Dollar | Token::Int | Token::Float | Token::String => {
199 self.parse_leaf().map(|value| Inline {
200 value,
201 filters: None,
202 })
203 }
204 _ => Err(ParseError::unexpected(
205 vec![
206 Token::Less,
207 Token::Dollar,
208 Token::Int,
209 Token::Float,
210 Token::String,
211 ],
212 lx,
213 span,
214 )),
215 }
216 }
217
218 fn parse_selector_list(&mut self) -> Result<Option<AstRef<'a, SelectorList<'a>>>> {
219 let mut item = self.scanner.peek_non_comment();
220 if item.1.token == Token::Whitespace {
221 self.scanner.eat_token();
222 let next = self.scanner.peek_non_whitespace();
223 match next.1.token {
226 Token::Id | Token::Hash | Token::Dot | Token::Star => (),
227 _ => item = next,
228 };
229 }
230
231 let (span, lx) = item;
232
233 let sel = match lx.token {
234 Token::BraceOpen | Token::ParenOpen => return Ok(None),
235 Token::Whitespace => SelectorCombinator::Descendent(self.parse_selector()?),
238 Token::Greater => {
239 self.scanner.eat_token();
240 SelectorCombinator::Child(self.parse_selector()?)
241 }
242 Token::Plus => {
243 self.scanner.eat_token();
244 SelectorCombinator::NextSibling(self.parse_selector()?)
245 }
246 Token::Tilde => {
247 self.scanner.eat_token();
248 SelectorCombinator::SubsequentSibling(self.parse_selector()?)
249 }
250 Token::Hash | Token::Dot | Token::Id | Token::Star => {
251 SelectorCombinator::And(self.parse_selector()?)
252 }
253 _ => {
254 return Err(ParseError::unexpected(
255 vec![
256 Token::Whitespace,
257 Token::Greater,
258 Token::Plus,
259 Token::Tilde,
260 Token::Hash,
261 Token::Dot,
262 Token::Id,
263 Token::Star,
264 ],
265 lx,
266 span,
267 ))
268 }
269 };
270
271 let itm = SelectorList::new(sel, self.parse_selector_list()?);
272
273 Ok(Some(self.arena.insert_variant(itm)))
274 }
275
276 fn parse_selector(&mut self) -> Result<Selector<'a>> {
277 let (span, lx) = self.scanner.peek_non_whitespace();
278 match lx.token {
279 Token::Dot => {
280 self.scanner.eat_token();
281 self.try_eat(Token::Id).map(|lx| Selector::Class(lx.value))
282 }
283 Token::Hash => {
284 self.scanner.eat_token();
285 self.try_eat(Token::Id).map(|lx| Selector::Id(lx.value))
286 }
287 Token::Id => {
288 self.scanner.eat_token();
289 Ok(Selector::Tag(lx.value))
290 }
291 Token::Star => {
292 self.scanner.eat_token();
293 Ok(Selector::Any)
294 }
295 _ => Err(ParseError::unexpected(
296 vec![Token::Dot, Token::Hash, Token::Id, Token::Star],
297 lx,
298 span,
299 )),
300 }
301 }
302
303 fn parse_filter_list(&mut self) -> Result<Option<AstRef<'a, FilterList<'a>>>> {
304 let (_, lx) = self.scanner.peek_non_whitespace();
305 if lx.token == Token::Pipe {
306 self.scanner.eat_token();
307 let filter = self.parse_filter()?;
308 let next = self.parse_filter_list()?;
309 let qualifier = self.parse_qualifier()?;
310 let r = self
311 .arena
312 .insert_variant(FilterList::new(filter, qualifier, next));
313 Ok(Some(r))
314 } else {
315 Ok(None)
316 }
317 }
318
319 fn parse_filter(&mut self) -> Result<Filter<'a>> {
320 let (span, lx) = self.scanner.peek_non_whitespace();
321 self.scanner.eat_token();
322
323 match lx.token {
324 Token::Id => {
325 let id = lx.value;
326 self.try_eat(Token::ParenOpen)?;
327 let args = self.parse_arg_list()?;
328 self.try_eat(Token::ParenClose)?;
329 Ok(Filter::Call(FilterCall::new(id, args)))
330 }
331 Token::BracketOpen => {
332 let name = self.try_eat(Token::Id)?.value;
333 self.try_eat(Token::Colon)?;
334 let leaf = self.parse_leaf()?;
335 let filters = self.parse_filter_list()?;
336 self.try_eat(Token::BracketClose)?;
337 Ok(Filter::Select(FilterSelect::new(
338 name,
339 Inline {
340 value: leaf,
341 filters,
342 },
343 )))
344 }
345 _ => Err(ParseError::unexpected(
346 vec![Token::Id, Token::BracketOpen],
347 lx,
348 span,
349 )),
350 }
351 }
352
353 fn parse_arg_list(&mut self) -> Result<Option<AstRef<'a, ArgList<'a>>>> {
354 let (span, lx) = self.scanner.peek_non_whitespace();
355 match lx.token {
356 Token::ParenClose => Ok(None),
357 Token::Id => {
358 let id = lx.value;
359 self.scanner.eat_token();
360 self.try_eat(Token::Colon)?;
361 let value = self.parse_value()?;
362 let next = match self.scanner.peek_non_whitespace().1.token {
363 Token::Comma => {
364 self.scanner.eat_token();
365 self.parse_arg_list()?
366 }
367 _ => None,
368 };
369
370 let r = self.arena.insert_variant(ArgList::new(id, value, next));
371 Ok(Some(r))
372 }
373 _ => Err(ParseError::unexpected(
374 vec![Token::ParenClose, Token::Id],
375 lx,
376 span,
377 )),
378 }
379 }
380
381 fn parse_qualifier(&mut self) -> Result<Qualifier> {
382 let (_, lx) = self.scanner.peek_non_whitespace();
383 Ok(match lx.token {
384 Token::Question => {
385 self.scanner.eat_token();
386 Qualifier::Optional
387 }
388 Token::Star => {
389 self.scanner.eat_token();
390 Qualifier::Collection
391 }
392 _ => Qualifier::One,
393 })
394 }
395}
396
397fn parse_string_literal(s: &str) -> Cow<'_, str> {
398 debug_assert!(s.len() >= 2 && &s[0..1] == "\"" && &s[s.len() - 1..] == "\"");
399 let mut replace = vec![];
400 let s = &s[1..s.len() - 1];
401
402 let mut escape_next = false;
403 for (i, s) in s.char_indices() {
404 if escape_next {
405 escape_next = false;
406 let escaped = match s {
407 'n' => '\n',
408 '\\' => '\\',
409 '"' => '"',
410 other => {
411 eprintln!("Unknown escape character {other:?}");
413 other
414 }
415 };
416
417 replace.push((i, Some(escaped)));
418 } else if s == '\\' {
419 escape_next = true;
420 replace.push((i, None));
421 }
422 }
423
424 if replace.is_empty() {
425 Cow::Borrowed(s)
426 } else {
427 let mut replace = replace.into_iter().peekable();
428 Cow::Owned(
429 s.char_indices()
430 .filter_map(|(i, x)| {
431 replace
432 .peek()
433 .copied()
434 .and_then(|(j, v)| {
435 if i == j {
436 let _ = replace.next();
437 Some(v)
438 } else {
439 None
440 }
441 })
442 .unwrap_or(Some(x))
443 })
444 .collect(),
445 )
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use std::borrow::Cow;
452
453 use super::{parse_string_literal, Parser};
454 use crate::frontend::ast::*;
455
456 fn fmt_selector<'a>(head: &Selector<'a>, list: &[&SelectorList<'a>]) -> String {
457 use std::fmt::Write as _;
458 let mut out = String::new();
459 write!(&mut out, "{head}").expect("fmt error");
460 for node in list {
461 let _ = match &node.sel {
462 SelectorCombinator::And(s) => write!(&mut out, "{s}"),
463 SelectorCombinator::Child(s) => write!(&mut out, " > {s}"),
464 SelectorCombinator::Descendent(s) => write!(&mut out, " {s}"),
465 SelectorCombinator::NextSibling(s) => write!(&mut out, " + {s}"),
466 SelectorCombinator::SubsequentSibling(s) => write!(&mut out, " ~ {s}"),
467 };
468 }
469
470 out
471 }
472
473 #[test]
474 fn test_parse() {
475 let string = r#"a: h1 {
476 x: $me | cat(i: "x", ) | meow();
477
478 y: h2#x > .cat {
479
480 };
481 };"#;
482 let parser = Parser::new(&string);
483 let (arena, r) = parser.parse().expect("parsing failed");
484
485 let stmts = arena.flatten(r);
486 let stmt = &stmts[0].value;
487
488 assert_eq!(stmt.id, "a");
489 let RValue::Element(element) = &stmt.value else {
490 panic!("expected element");
491 };
492
493 assert_eq!(
494 fmt_selector(&element.selector_head, &arena.flatten(element.selectors)),
495 "h1"
496 );
497
498 assert_eq!(element.qualifier, Qualifier::One);
499 let statements = arena.flatten(element.statements);
500
501 let stmt = &statements[0].value;
502
503 assert!(
504 matches!(
505 stmt,
506 Statement {
507 id: "x",
508 value: RValue::Leaf(Leaf::Var("me")),
509 ..
510 }
511 ),
512 "found {stmt:?}",
513 );
514
515 let filters = arena.flatten(stmt.filters);
516 assert!(
517 matches!(
518 &filters[..],
519 [
520 FilterList {
521 filter: Filter::Call(FilterCall { id: "cat", .. }),
522 ..
523 },
524 FilterList {
525 filter: Filter::Call(FilterCall { id: "meow", .. }),
526 ..
527 }
528 ]
529 ),
530 "found {filters:?}"
531 );
532
533 let Filter::Call(filter) = &filters[0].filter else {
534 unreachable!("Validated as Filter::Call above");
535 };
536 let args = arena.flatten(filter.args);
537 assert!(
538 matches!(
539 &args[..],
540 [ArgList {
541 id: "i",
542 value: Inline {
543 value: Leaf::String(Cow::Borrowed("x")),
544 filters: None,
545 },
546 ..
547 }]
548 ),
549 "found {:?}",
550 &args[..]
551 );
552
553 let stmt = &statements[1].value;
554
555 let RValue::Element(element) = &stmt.value else {
556 panic!("Expected element");
557 };
558
559 assert!(element.statements.is_none());
560 assert_eq!(
561 fmt_selector(&element.selector_head, &arena.flatten(element.selectors)),
562 "h2#x > .cat"
563 );
564 }
565
566 #[test]
567 fn test_escape_strings() {
568 assert_eq!(parse_string_literal(r#""""#), "");
569 assert_eq!(parse_string_literal(r#""abcdef""#), "abcdef");
570 assert_eq!(parse_string_literal(r#""hello! \n""#), "hello! \n");
571 assert_eq!(
572 parse_string_literal(r#""my \" crazy \\ lifestyle \\\"""#),
573 r#"my " crazy \ lifestyle \""#
574 );
575 }
576}