html_languageservice/parser/
html_parse.rs

1use crate::{
2    language_facts::data_manager::HTMLDataManager,
3    parser::html_scanner::{Scanner, TokenType},
4};
5use lsp_textdocument::FullTextDocument;
6
7use super::{
8    html_document::{HTMLDocument, Node, NodeAttribute},
9    html_scanner::ScannerState,
10};
11
12pub struct HTMLParser;
13
14impl HTMLParser {
15    pub fn parse_document(
16        document: &FullTextDocument,
17        data_manager: &HTMLDataManager,
18    ) -> HTMLDocument {
19        HTMLParser::parse(
20            document.get_content(None),
21            &document.language_id(),
22            data_manager,
23        )
24    }
25
26    pub fn parse(text: &str, language_id: &str, data_manager: &HTMLDataManager) -> HTMLDocument {
27        parse_html_document(text, language_id, &data_manager)
28    }
29}
30
31pub fn parse_html_document(
32    text: &str,
33    language_id: &str,
34    data_manager: &HTMLDataManager,
35) -> HTMLDocument {
36    let void_elements = data_manager.get_void_elements(language_id);
37    let mut scanner = Scanner::new(text, 0, ScannerState::WithinContent, true);
38
39    let mut html_document = Node::new(0, scanner.get_source_len(), vec![]);
40    let mut cur = &mut html_document as *mut Node;
41    let mut parent_list: Vec<*mut Node> = vec![];
42    let mut end_tag_start = None;
43    let mut end_tag_name = None;
44    let mut pending_attribute = None;
45    let mut token = scanner.scan();
46    unsafe {
47        while token != TokenType::EOS {
48            match token {
49                TokenType::StartTagOpen => {
50                    let child =
51                        Node::new(scanner.get_token_offset(), scanner.get_source_len(), vec![]);
52                    let length = (*cur).children.len();
53                    (*cur).children.push(child);
54                    parent_list.push(cur);
55                    cur = &mut (*cur).children[length];
56                }
57                TokenType::StartTag => {
58                    (*cur).tag = Some(scanner.get_token_text().to_string());
59                }
60                TokenType::StartTagClose => {
61                    if !parent_list.is_empty() {
62                        (*cur).end = scanner.get_token_end();
63                        if scanner.get_token_length() > 0 {
64                            let tag = (*cur).tag.clone();
65                            (*cur).start_tag_end = Some(scanner.get_token_end());
66                            if tag.is_some()
67                                && data_manager.is_void_element(&tag.unwrap(), &void_elements)
68                            {
69                                (*cur).closed = true;
70                                cur = parent_list.pop().unwrap();
71                            }
72                        } else {
73                            // pseudo close token from an incomplete start tag
74                            cur = parent_list.pop().unwrap();
75                        }
76                    }
77                }
78                TokenType::StartTagSelfClose => {
79                    if !parent_list.is_empty() {
80                        (*cur).closed = true;
81                        (*cur).end = scanner.get_token_end();
82                        (*cur).start_tag_end = Some(scanner.get_token_end());
83                        cur = parent_list.pop().unwrap();
84                    }
85                }
86                TokenType::EndTagOpen => {
87                    end_tag_start = Some(scanner.get_token_offset());
88                    end_tag_name = None;
89                }
90                TokenType::EndTag => {
91                    end_tag_name = Some(scanner.get_token_text().to_string().to_lowercase());
92                }
93                TokenType::EndTagClose => {
94                    let mut node = cur;
95                    let mut node_parent_list_length = parent_list.len();
96                    let end_tag_name = end_tag_name.as_deref();
97                    // see if we can find a matching tag
98                    while !(*node).is_same_tag(end_tag_name) && node_parent_list_length > 0 {
99                        node_parent_list_length -= 1;
100                        node = parent_list[node_parent_list_length];
101                    }
102                    if node_parent_list_length > 0 {
103                        while node_parent_list_length != parent_list.len() {
104                            (*cur).end = end_tag_start.unwrap();
105                            (*cur).closed = false;
106                            cur = parent_list.pop().unwrap();
107                        }
108                        (*cur).closed = true;
109                        (*cur).end_tag_start = end_tag_start;
110                        (*cur).end = scanner.get_token_end();
111                        cur = parent_list.pop().unwrap();
112                    }
113                }
114                TokenType::AttributeName => {
115                    let text = scanner.get_token_text();
116                    pending_attribute = Some(text.to_string());
117                    (*cur).attributes.insert(
118                        text.to_string(),
119                        NodeAttribute::new(None, scanner.get_token_offset()),
120                    ); // Support valueless attributes such as 'checked'
121                }
122                TokenType::AttributeValue => {
123                    let text = scanner.get_token_text();
124                    if let Some(attr) = pending_attribute {
125                        let offset = scanner.get_token_offset() - 1 - attr.len();
126                        (*cur)
127                            .attributes
128                            .insert(attr, NodeAttribute::new(Some(text.to_string()), offset));
129                        pending_attribute = None;
130                    }
131                }
132                _ => {}
133            }
134            token = scanner.scan();
135        }
136        while !parent_list.is_empty() {
137            (*cur).end = scanner.get_source_len();
138            (*cur).closed = false;
139            cur = parent_list.pop().unwrap();
140        }
141    }
142    let mut roots = vec![];
143    for root in html_document.children {
144        roots.push(root);
145    }
146    HTMLDocument { roots }
147}