Skip to main content

oak_html/parser/
mod.rs

1/// Element type module for HTML.
2pub mod element_type;
3
4use crate::{
5    language::HtmlLanguage,
6    lexer::{HtmlLexer, token_type::HtmlTokenType},
7    parser::element_type::HtmlElementType,
8};
9use oak_core::{
10    GreenNode, OakError,
11    parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
12    source::{Source, TextEdit},
13};
14
15pub(crate) type State<'a, S> = ParserState<'a, HtmlLanguage, S>;
16
17/// Parser for the HTML language.
18///
19/// This parser transforms a stream of tokens into a green tree of HTML syntax nodes.
20pub struct HtmlParser {
21    pub(crate) config: HtmlLanguage,
22}
23
24impl HtmlParser {
25    /// Creates a new `HtmlParser` with the given configuration.
26    pub fn new(config: HtmlLanguage) -> Self {
27        Self { config }
28    }
29
30    /// Parses an HTML tag, including its attributes and potentially its children.
31    ///
32    /// This method handles both self-closing tags (e.g., `<br/>`) and tags with
33    /// separate closing tags (e.g., `<div>...</div>`).
34    fn parse_tag<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
35        use crate::lexer::token_type::HtmlTokenType::*;
36        let cp = state.checkpoint();
37        state.expect(TagOpen).ok();
38        state.expect(TagName).ok();
39
40        while state.not_at_end() && !matches!(state.peek_kind(), Some(TagClose) | Some(TagSelfClose)) {
41            if state.at(AttributeName) {
42                let attr_cp = state.checkpoint();
43                state.bump(); // AttributeName
44                if state.eat(Equal) {
45                    state.eat(Quote);
46                    state.eat(AttributeValue);
47                    state.eat(Quote);
48                }
49                state.finish_at(attr_cp, HtmlElementType::Attribute);
50            }
51            else {
52                state.advance();
53            }
54        }
55
56        if state.eat(TagSelfClose) {
57            // Self-closing tag
58        }
59        else if state.eat(TagClose) {
60            // Recurse to parse children until the matching closing tag is found
61            // Simplified handling: skip until closing tag
62            while state.not_at_end() && !state.at(TagSlashOpen) {
63                if state.at(TagOpen) {
64                    self.parse_tag(state)?
65                }
66                else {
67                    state.advance();
68                }
69            }
70            if state.eat(TagSlashOpen) {
71                state.eat(TagName);
72                state.expect(TagClose).ok();
73            }
74        }
75
76        state.finish_at(cp, HtmlElementType::Element);
77        Ok(())
78    }
79}
80
81impl Parser<HtmlLanguage> for HtmlParser {
82    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<HtmlLanguage>) -> ParseOutput<'a, HtmlLanguage> {
83        let lexer = HtmlLexer::new(&self.config);
84        parse_with_lexer(&lexer, text, edits, cache, |state| {
85            let checkpoint = state.checkpoint();
86
87            while state.not_at_end() {
88                match state.peek_kind() {
89                    Some(HtmlTokenType::TagOpen) => self.parse_tag(state)?,
90                    Some(HtmlTokenType::Doctype) => {
91                        state.bump();
92                    }
93                    Some(HtmlTokenType::Comment) => {
94                        state.bump();
95                    }
96                    _ => {
97                        state.bump();
98                    }
99                }
100            }
101
102            Ok(state.finish_at(checkpoint, crate::parser::element_type::HtmlElementType::Document))
103        })
104    }
105}