css3_selector/
parser.rs

1use crate::ast::*;
2use crate::token::*;
3use std::error::Error;
4use std::fmt;
5
6#[derive(Debug)]
7pub struct ParseError<'a> {
8    pub token: &'a str,
9    pub index: usize,
10    pub message: String,
11}
12
13impl<'a> fmt::Display for ParseError<'a> {
14    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
15        write!(
16            f,
17            "failed to parse @ {} ('{}'): {}",
18            self.index, self.token, self.message
19        )
20    }
21}
22
23impl<'a> Error for ParseError<'a> {}
24
25struct Parser;
26
27trait Parse<'a, T: Node<'a>> {
28    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<T, ParseError<'a>>;
29
30    /// parse but destroy mutations to tokens if Err
31    fn parse_reset(tokens: &mut &[TokenSpan<'a>]) -> Result<T, ParseError<'a>> {
32        let mut new_tokens = *tokens;
33        let result = Self::parse(&mut new_tokens)?;
34        *tokens = new_tokens;
35        Ok(result)
36    }
37}
38
39pub fn parse<'a>(mut tokens: &[TokenSpan<'a>]) -> Result<SelectorsGroup<'a>, ParseError<'a>> {
40    Parser::parse(&mut tokens)
41}
42
43impl<'a> Parse<'a, SelectorsGroup<'a>> for Parser {
44    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<SelectorsGroup<'a>, ParseError<'a>> {
45        let mut selectors: Vec<Selector<'a>> = vec![Parser::parse(tokens)?];
46        while let Some((_, Token::COMMA)) = tokens.get(0) {
47            *tokens = &tokens[1..];
48            while let Some((_, Token::S)) = tokens.get(0) {
49                *tokens = &tokens[1..];
50            }
51            selectors.push(Parser::parse(tokens)?);
52        }
53
54        Ok(SelectorsGroup::<'a> { selectors })
55    }
56}
57
58impl<'a> Parse<'a, Selector<'a>> for Parser {
59    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Selector<'a>, ParseError<'a>> {
60        let base: SelectorSequence<'a> = Parser::parse(tokens)?;
61        let mut modifiers: Vec<(Combinator, SelectorSequence<'a>)> = vec![];
62        loop {
63            let combinator = match tokens.get(0) {
64                Some((_, Token::PLUS)) => Combinator::Plus,
65                Some((_, Token::GREATER)) => Combinator::Greater,
66                Some((_, Token::TILDE)) => Combinator::Tilde,
67                Some((_, Token::S)) => Combinator::None,
68                _ => break,
69            };
70            *tokens = &tokens[1..];
71            while let Some((_, Token::S)) = tokens.get(0) {
72                *tokens = &tokens[1..];
73            }
74            let modifier: SelectorSequence<'a> = Parser::parse(tokens)?;
75            modifiers.push((combinator, modifier));
76        }
77
78        Ok(Selector::<'a> { base, modifiers })
79    }
80}
81
82impl<'a> Parse<'a, SelectorSequence<'a>> for Parser {
83    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<SelectorSequence<'a>, ParseError<'a>> {
84        let type_selector: Option<TypeSelector<'a>> = Parser::parse_reset(tokens).ok();
85        let mut attribute_selectors = vec![];
86        let last_error;
87        loop {
88            match Parser::parse(tokens) {
89                Ok(attribute_selector) => {
90                    attribute_selectors.push(attribute_selector);
91                }
92                Err(e) => {
93                    last_error = e;
94                    break;
95                }
96            }
97        }
98        if type_selector.is_none() && attribute_selectors.len() == 0 {
99            return Err(ParseError::<'a> {
100                index: tokens.get(0).map(|x| x.0.start).unwrap_or(0),
101                token: tokens.get(0).map(|x| x.0.value).unwrap_or_default(),
102                message: format!("expected at least 1 attribute selector to accompany the type selector, 0 found\n{:?}", last_error),
103            });
104        }
105        Ok(SelectorSequence::<'a> {
106            type_selector,
107            attribute_selectors,
108        })
109    }
110}
111
112impl<'a> Parse<'a, AttributeSelector<'a>> for Parser {
113    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<AttributeSelector<'a>, ParseError<'a>> {
114        match tokens.get(0) {
115            Some((_, Token::HASH(hash))) => {
116                *tokens = &tokens[1..];
117                Ok(AttributeSelector::Hash(hash))
118            }
119            Some((_, Token::DOT(class))) => {
120                *tokens = &tokens[1..];
121                Ok(AttributeSelector::Class(class))
122            }
123            Some((_, Token::LBRACK)) => Ok(AttributeSelector::Attribute(Parser::parse(tokens)?)),
124            Some((_, Token::COLON)) => Ok(AttributeSelector::Psuedo(Parser::parse(tokens)?)),
125            Some((_, Token::NOT)) => Ok(AttributeSelector::Negation(Parser::parse(tokens)?)),
126            Some((span, _token)) => Err(ParseError::<'a> {
127                index: span.start,
128                token: span.value,
129                message: "expected a '.', '#', '[', ':', or 'NOT', none found".to_string(),
130            }),
131            None => Err(ParseError::<'a> {
132                index: 0,
133                token: "",
134                message: "expected a '.', '#', '[', ':', or 'NOT', EOF found".to_string(),
135            }),
136        }
137    }
138}
139
140impl<'a> Parse<'a, Namespace<'a>> for Parser {
141    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Namespace<'a>, ParseError<'a>> {
142        let matched = match tokens.get(0) {
143            Some((_, Token::IDENT(ident))) => {
144                *tokens = &tokens[1..];
145                Namespace::<'a>::Ident(ident)
146            }
147            Some((_, Token::STAR)) => {
148                *tokens = &tokens[1..];
149                Namespace::<'a>::All
150            }
151            Some((span, _token)) => {
152                return Err(ParseError::<'a> {
153                    index: span.start,
154                    token: span.value,
155                    message: "expected a '[', none found".to_string(),
156                })
157            }
158            None => {
159                return Err(ParseError::<'a> {
160                    index: 0,
161                    token: "",
162                    message: "expected a ']', EOF found".to_string(),
163                })
164            }
165        };
166        match tokens.get(0) {
167            Some((_, Token::PIPE)) => {
168                *tokens = &tokens[1..];
169            }
170            Some((span, _token)) => {
171                return Err(ParseError::<'a> {
172                    index: span.start,
173                    token: span.value,
174                    message: "expected a '[', none found".to_string(),
175                })
176            }
177            None => {
178                return Err(ParseError::<'a> {
179                    index: 0,
180                    token: "",
181                    message: "expected a ']', EOF found".to_string(),
182                })
183            }
184        };
185
186        Ok(matched)
187    }
188}
189
190impl<'a> Parse<'a, TypeSelector<'a>> for Parser {
191    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<TypeSelector<'a>, ParseError<'a>> {
192        let namespace: Option<Namespace<'a>> = Parser::parse_reset(tokens).ok();
193        match tokens.get(0) {
194            Some((_, Token::IDENT(ident))) => {
195                *tokens = &tokens[1..];
196                Ok(TypeSelector::<'a> {
197                    namespace,
198                    element_name: Some(ident),
199                })
200            }
201            Some((_, Token::STAR)) => {
202                *tokens = &tokens[1..];
203                Ok(TypeSelector::<'a> {
204                    namespace,
205                    element_name: None,
206                })
207            }
208            Some((span, _token)) => Err(ParseError::<'a> {
209                index: span.start,
210                token: span.value,
211                message: "expected a '*' or ident, none found".to_string(),
212            }),
213            None => Err(ParseError::<'a> {
214                index: 0,
215                token: "",
216                message: "expected a '*' or ident, EOF found".to_string(),
217            }),
218        }
219    }
220}
221
222impl<'a> Parse<'a, Attribute<'a>> for Parser {
223    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Attribute<'a>, ParseError<'a>> {
224        match tokens.get(0) {
225            Some((_, Token::LBRACK)) => {
226                *tokens = &tokens[1..];
227            }
228            Some((span, _token)) => {
229                return Err(ParseError::<'a> {
230                    index: span.start,
231                    token: span.value,
232                    message: "expected a '[', none found".to_string(),
233                })
234            }
235            None => {
236                return Err(ParseError::<'a> {
237                    index: 0,
238                    token: "",
239                    message: "expected a '[', EOF found".to_string(),
240                })
241            }
242        }
243        while let Some((_, Token::S)) = tokens.get(0) {
244            *tokens = &tokens[1..];
245        }
246        let namespace: Option<Namespace<'a>> = Parser::parse_reset(tokens).ok();
247        let name = match tokens.get(0) {
248            Some((_, Token::IDENT(ident))) => {
249                *tokens = &tokens[1..];
250                Ok(ident)
251            }
252            Some((span, _token)) => Err(ParseError::<'a> {
253                index: span.start,
254                token: span.value,
255                message: "expected an ident, none found".to_string(),
256            }),
257            None => Err(ParseError::<'a> {
258                index: 0,
259                token: "",
260                message: "expected an ident, EOF found".to_string(),
261            }),
262        }?;
263        while let Some((_, Token::S)) = tokens.get(0) {
264            *tokens = &tokens[1..];
265        }
266        let matcher = match tokens.get(0) {
267            Some((_, Token::PREFIXMATCH)) => {
268                *tokens = &tokens[1..];
269                Ok(Matcher::Prefix)
270            }
271            Some((_, Token::SUFFIXMATCH)) => {
272                *tokens = &tokens[1..];
273                Ok(Matcher::Suffix)
274            }
275            Some((_, Token::SUBSTRINGMATCH)) => {
276                *tokens = &tokens[1..];
277                Ok(Matcher::Substring)
278            }
279            Some((_, Token::EQ)) => {
280                *tokens = &tokens[1..];
281                Ok(Matcher::Equal)
282            }
283            Some((_, Token::INCLUDES)) => {
284                *tokens = &tokens[1..];
285                Ok(Matcher::Includes)
286            }
287            Some((_, Token::DASHMATCH)) => {
288                *tokens = &tokens[1..];
289                Ok(Matcher::Dash)
290            }
291            Some((_, Token::RBRACK)) => {
292                *tokens = &tokens[1..];
293                return Ok(Attribute::<'a> {
294                    namespace,
295                    name,
296                    matcher: None,
297                    value: None,
298                });
299            }
300            Some((span, _token)) => Err(ParseError::<'a> {
301                index: span.start,
302                token: span.value,
303                message: "expected a '^=', '$=', '*=', '=', '~=', or '|=', none found".to_string(),
304            }),
305            None => Err(ParseError::<'a> {
306                index: 0,
307                token: "",
308                message: "expected a '^=', '$=', '*=', '=', '~=', or '|=', EOF found".to_string(),
309            }),
310        }?;
311        while let Some((_, Token::S)) = tokens.get(0) {
312            *tokens = &tokens[1..];
313        }
314        let value = match tokens.get(0) {
315            Some((_, Token::IDENT(value))) | Some((_, Token::STRING(value))) => {
316                *tokens = &tokens[1..];
317                Some(*value)
318            }
319            Some((span, _token)) => {
320                return Err(ParseError::<'a> {
321                    index: span.start,
322                    token: span.value,
323                    message: "expected an ident or string, none found".to_string(),
324                })
325            }
326            None => {
327                return Err(ParseError::<'a> {
328                    index: 0,
329                    token: "",
330                    message: "expected an ident or string, EOF found".to_string(),
331                })
332            }
333        };
334        while let Some((_, Token::S)) = tokens.get(0) {
335            *tokens = &tokens[1..];
336        }
337        match tokens.get(0) {
338            Some((_, Token::RBRACK)) => {
339                *tokens = &tokens[1..];
340            }
341            Some((span, _token)) => {
342                return Err(ParseError::<'a> {
343                    index: span.start,
344                    token: span.value,
345                    message: "expected a ']', none found".to_string(),
346                })
347            }
348            None => {
349                return Err(ParseError::<'a> {
350                    index: 0,
351                    token: "",
352                    message: "expected a ']', EOF found".to_string(),
353                })
354            }
355        }
356        Ok(Attribute::<'a> {
357            namespace,
358            name,
359            matcher: Some(matcher),
360            value,
361        })
362    }
363}
364
365impl<'a> Parse<'a, Psuedo<'a>> for Parser {
366    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Psuedo<'a>, ParseError<'a>> {
367        match tokens.get(0) {
368            Some((_, Token::COLON)) => {
369                *tokens = &tokens[1..];
370            }
371            Some((span, _token)) => {
372                return Err(ParseError::<'a> {
373                    index: span.start,
374                    token: span.value,
375                    message: "expected a ':', none found".to_string(),
376                })
377            }
378            None => {
379                return Err(ParseError::<'a> {
380                    index: 0,
381                    token: "",
382                    message: "expected a ':', EOF found".to_string(),
383                })
384            }
385        };
386        let is_class_type = match tokens.get(0) {
387            Some((_, Token::COLON)) => {
388                *tokens = &tokens[1..];
389                false
390            }
391            _ => true,
392        };
393        match tokens.get(0) {
394            Some((_, Token::IDENT(ident))) => {
395                *tokens = &tokens[1..];
396                Ok(Psuedo::<'a> {
397                    is_class_type,
398                    name: ident,
399                    arg: None,
400                })
401            }
402            Some((_, Token::FUNCTION(ident))) => {
403                *tokens = &tokens[1..];
404                while let Some((_, Token::S)) = tokens.get(0) {
405                    *tokens = &tokens[1..];
406                }
407                let arg: Expression<'a> = Parser::parse(tokens)?;
408                match tokens.get(0) {
409                    Some((_, Token::RPAREN)) => {
410                        *tokens = &tokens[1..];
411                    }
412                    Some((span, _token)) => {
413                        return Err(ParseError::<'a> {
414                            index: span.start,
415                            token: span.value,
416                            message: "expected a ')', none found".to_string(),
417                        })
418                    }
419                    None => {
420                        return Err(ParseError::<'a> {
421                            index: 0,
422                            token: "",
423                            message: "expected a ')', EOF found".to_string(),
424                        })
425                    }
426                };
427                Ok(Psuedo::<'a> {
428                    is_class_type,
429                    name: ident,
430                    arg: Some(arg),
431                })
432            }
433            Some((span, _token)) => {
434                return Err(ParseError::<'a> {
435                    index: span.start,
436                    token: span.value,
437                    message: "expected an ident or function, none found".to_string(),
438                })
439            }
440            None => {
441                return Err(ParseError::<'a> {
442                    index: 0,
443                    token: "",
444                    message: "expected an ident or function, EOF found".to_string(),
445                })
446            }
447        }
448    }
449}
450
451impl<'a> Parse<'a, Expression<'a>> for Parser {
452    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Expression<'a>, ParseError<'a>> {
453        let mut items = vec![];
454        loop {
455            let item = Parser::parse(tokens);
456            if item.is_err() {
457                break;
458            }
459            while let Some((_, Token::S)) = tokens.get(0) {
460                *tokens = &tokens[1..];
461            }
462            items.push(item.ok().unwrap());
463        }
464        Ok(Expression::<'a> { items })
465    }
466}
467
468impl<'a> Parse<'a, ExpressionItem<'a>> for Parser {
469    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<ExpressionItem<'a>, ParseError<'a>> {
470        match tokens.get(0) {
471            Some((_, Token::PLUS)) => {
472                *tokens = &tokens[1..];
473                Ok(ExpressionItem::<'a>::Plus)
474            }
475            Some((_, Token::SUB)) => {
476                *tokens = &tokens[1..];
477                Ok(ExpressionItem::<'a>::Minus)
478            }
479            Some((_, Token::DIMENSION(dimension, unit))) => {
480                *tokens = &tokens[1..];
481                Ok(ExpressionItem::<'a>::Dimension(dimension, unit))
482            }
483            Some((_, Token::NUM(number))) => {
484                *tokens = &tokens[1..];
485                Ok(ExpressionItem::<'a>::Number(number))
486            }
487            Some((_, Token::STRING(string))) => {
488                *tokens = &tokens[1..];
489                Ok(ExpressionItem::<'a>::Str(string))
490            }
491            Some((_, Token::IDENT(ident))) => {
492                *tokens = &tokens[1..];
493                Ok(ExpressionItem::<'a>::Ident(ident))
494            }
495            Some((span, _token)) => {
496                return Err(ParseError::<'a> {
497                    index: span.start,
498                    token: span.value,
499                    message: "expected a '+', '-', dimension, number, string, or ident, none found"
500                        .to_string(),
501                })
502            }
503            None => {
504                return Err(ParseError::<'a> {
505                    index: 0,
506                    token: "",
507                    message: "expected a '+', '-', dimension, number, string, or ident, EOF found"
508                        .to_string(),
509                })
510            }
511        }
512    }
513}
514
515impl<'a> Parse<'a, Negation<'a>> for Parser {
516    fn parse(tokens: &mut &[TokenSpan<'a>]) -> Result<Negation<'a>, ParseError<'a>> {
517        match tokens.get(0) {
518            Some((_, Token::NOT)) => {
519                *tokens = &tokens[1..];
520            }
521            Some((span, _token)) => {
522                return Err(ParseError::<'a> {
523                    index: span.start,
524                    token: span.value,
525                    message: "expected a ':not(', none found".to_string(),
526                })
527            }
528            None => {
529                return Err(ParseError::<'a> {
530                    index: 0,
531                    token: "",
532                    message: "expected a ':not(', EOF found".to_string(),
533                })
534            }
535        }
536        while let Some((_, Token::S)) = tokens.get(0) {
537            *tokens = &tokens[1..];
538        }
539        let result = match tokens.get(0) {
540            Some((_, Token::IDENT(_))) | Some((_, Token::STAR)) => {
541                Ok(Negation::TypeSelector(Parser::parse(tokens)?))
542            }
543            Some((_, Token::HASH(hash))) => {
544                *tokens = &tokens[1..];
545                Ok(Negation::Hash(hash))
546            }
547            Some((_, Token::DOT(class))) => {
548                *tokens = &tokens[1..];
549                Ok(Negation::Class(class))
550            }
551            Some((_, Token::LBRACK)) => Ok(Negation::Attribute(Parser::parse(tokens)?)),
552            Some((_, Token::COLON)) => Ok(Negation::Psuedo(Parser::parse(tokens)?)),
553            Some((span, _token)) => Err(ParseError::<'a> {
554                index: span.start,
555                token: span.value,
556                message: "expected a '.', '#', '[', ident, '*', or ':', none found".to_string(),
557            }),
558            None => Err(ParseError::<'a> {
559                index: 0,
560                token: "",
561                message: "expected a '.', '#', '[', ident, '*', or ':', EOF found".to_string(),
562            }),
563        }?;
564        while let Some((_, Token::S)) = tokens.get(0) {
565            *tokens = &tokens[1..];
566        }
567        match tokens.get(0) {
568            Some((_, Token::RPAREN)) => {
569                *tokens = &tokens[1..];
570            }
571            Some((span, _token)) => {
572                return Err(ParseError::<'a> {
573                    index: span.start,
574                    token: span.value,
575                    message: "expected a ')', none found".to_string(),
576                })
577            }
578            None => {
579                return Err(ParseError::<'a> {
580                    index: 0,
581                    token: "",
582                    message: "expected a ')', EOF found".to_string(),
583                })
584            }
585        }
586        Ok(result)
587    }
588}
589
590#[cfg(test)]
591mod test {
592    use super::*;
593    use crate::token::Lexer;
594    use crate::PASS_SELECTORS;
595
596    #[test]
597    fn pass_tests() {
598        for test in PASS_SELECTORS.iter() {
599            let lexed = Lexer::parse(test).unwrap();
600            // println!("{:?}", lexed.tokens.iter().map(|x| &x.1).collect::<Vec<&Token>>());
601            if let Err(e) = parse(&lexed.tokens[..]) {
602                panic!("failed to lex {}: {:?}", test, e);
603            }
604        }
605    }
606}