Skip to main content

oak_html/parser/
mod.rs

1pub mod element_type;
2
3use crate::{
4    language::HtmlLanguage,
5    lexer::{HtmlLexer, token_type::HtmlTokenType},
6    parser::element_type::HtmlElementType,
7};
8use oak_core::{
9    GreenNode, OakError,
10    parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
11    source::{Source, TextEdit},
12};
13
14pub(crate) type State<'a, S> = ParserState<'a, HtmlLanguage, S>;
15
16/// Parser for the HTML language.
17///
18/// This parser transforms a stream of tokens into a green tree of HTML syntax nodes.
19pub struct HtmlParser {
20    pub(crate) _config: HtmlLanguage,
21}
22
23impl HtmlParser {
24    /// Creates a new `HtmlParser` with the given configuration.
25    pub fn new(config: HtmlLanguage) -> Self {
26        Self { _config: config }
27    }
28
29    /// The internal entry point for parsing the root of an HTML document.
30    pub(crate) fn parse_root_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<&'a GreenNode<'a, HtmlLanguage>, OakError> {
31        let checkpoint = state.checkpoint();
32
33        while state.not_at_end() {
34            match state.peek_kind() {
35                Some(HtmlTokenType::TagOpen) => self.parse_tag(state)?,
36                Some(HtmlTokenType::Doctype) => {
37                    state.bump();
38                }
39                Some(HtmlTokenType::Comment) => {
40                    state.bump();
41                }
42                _ => {
43                    state.bump();
44                }
45            }
46        }
47
48        Ok(state.finish_at(checkpoint, crate::parser::element_type::HtmlElementType::Document))
49    }
50
51    /// Parses an HTML tag, including its attributes and potentially its children.
52    ///
53    /// This method handles both self-closing tags (e.g., `<br/>`) and tags with
54    /// separate closing tags (e.g., `<div>...</div>`).
55    fn parse_tag<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
56        use crate::lexer::token_type::HtmlTokenType::*;
57        let cp = state.checkpoint();
58        state.expect(TagOpen).ok();
59        state.expect(TagName).ok();
60
61        while state.not_at_end() && !matches!(state.peek_kind(), Some(TagClose) | Some(TagSelfClose)) {
62            if state.at(AttributeName) {
63                let attr_cp = state.checkpoint();
64                state.bump(); // AttributeName
65                if state.eat(Equal) {
66                    state.eat(Quote);
67                    state.eat(AttributeValue);
68                    state.eat(Quote);
69                }
70                state.finish_at(attr_cp, HtmlElementType::Attribute);
71            }
72            else {
73                state.advance();
74            }
75        }
76
77        if state.eat(TagSelfClose) {
78            // Self-closing tag
79        }
80        else if state.eat(TagClose) {
81            // Recurse to parse children until the matching closing tag is found
82            // Simplified handling: skip until closing tag
83            while state.not_at_end() && !state.at(TagSlashOpen) {
84                if state.at(TagOpen) {
85                    self.parse_tag(state)?
86                }
87                else {
88                    state.advance();
89                }
90            }
91            if state.eat(TagSlashOpen) {
92                state.eat(TagName);
93                state.expect(TagClose).ok();
94            }
95        }
96
97        state.finish_at(cp, HtmlElementType::Element);
98        Ok(())
99    }
100}
101
102impl Parser<HtmlLanguage> for HtmlParser {
103    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<HtmlLanguage>) -> ParseOutput<'a, HtmlLanguage> {
104        let lexer = HtmlLexer::new(&self._config);
105        parse_with_lexer(&lexer, text, edits, cache, |state| self.parse_root_internal(state))
106    }
107}