html_languageservice/parser/
html_parse.rs

1use crate::{
2    language_facts::data_manager::HTMLDataManager,
3    parser::html_scanner::{Scanner, TokenType},
4};
5use lsp_textdocument::FullTextDocument;
6
7use super::{
8    html_document::{HTMLDocument, Node, NodeAttribute},
9    html_scanner::ScannerState,
10};
11
12pub struct HTMLParser;
13
14impl HTMLParser {
15    pub fn parse_document(
16        document: &FullTextDocument,
17        data_manager: &HTMLDataManager,
18        case_sensitive: bool,
19    ) -> HTMLDocument {
20        HTMLParser::parse(
21            document.get_content(None),
22            &document.language_id(),
23            data_manager,
24            case_sensitive,
25        )
26    }
27
28    pub fn parse(
29        text: &str,
30        language_id: &str,
31        data_manager: &HTMLDataManager,
32        case_sensitive: bool,
33    ) -> HTMLDocument {
34        parse_html_document(text, language_id, &data_manager, case_sensitive)
35    }
36}
37
38pub fn parse_html_document(
39    text: &str,
40    language_id: &str,
41    data_manager: &HTMLDataManager,
42    case_sensitive: bool,
43) -> HTMLDocument {
44    let void_elements = data_manager.get_void_elements(language_id);
45    let mut scanner = Scanner::new(text, 0, ScannerState::WithinContent, true, case_sensitive);
46
47    let mut html_document = Node::new(0, scanner.get_source_len(), vec![]);
48    let mut cur = &mut html_document as *mut Node;
49    let mut parent_list: Vec<*mut Node> = vec![];
50    let mut end_tag_start = None;
51    let mut end_tag_name = None;
52    let mut pending_attribute = None;
53    let mut token = scanner.scan();
54    unsafe {
55        while token != TokenType::EOS {
56            match token {
57                TokenType::StartTagOpen => {
58                    let child =
59                        Node::new(scanner.get_token_offset(), scanner.get_source_len(), vec![]);
60                    let length = (*cur).children.len();
61                    (*cur).children.push(child);
62                    parent_list.push(cur);
63                    cur = &mut (*cur).children[length];
64                }
65                TokenType::StartTag => {
66                    (*cur).tag = Some(scanner.get_token_text().to_string());
67                }
68                TokenType::StartTagClose => {
69                    if !parent_list.is_empty() {
70                        (*cur).end = scanner.get_token_end();
71                        if scanner.get_token_length() > 0 {
72                            let tag = (*cur).tag.clone();
73                            (*cur).start_tag_end = Some(scanner.get_token_end());
74                            if tag.is_some()
75                                && data_manager.is_void_element(&tag.unwrap(), &void_elements)
76                            {
77                                (*cur).closed = true;
78                                cur = parent_list.pop().unwrap();
79                            }
80                        } else {
81                            // pseudo close token from an incomplete start tag
82                            cur = parent_list.pop().unwrap();
83                        }
84                    }
85                }
86                TokenType::StartTagSelfClose => {
87                    if !parent_list.is_empty() {
88                        (*cur).closed = true;
89                        (*cur).end = scanner.get_token_end();
90                        (*cur).start_tag_end = Some(scanner.get_token_end());
91                        cur = parent_list.pop().unwrap();
92                    }
93                }
94                TokenType::EndTagOpen => {
95                    end_tag_start = Some(scanner.get_token_offset());
96                    end_tag_name = None;
97                }
98                TokenType::EndTag => {
99                    if case_sensitive {
100                        end_tag_name = Some(scanner.get_token_text().to_string());
101                    } else {
102                        end_tag_name = Some(scanner.get_token_text().to_lowercase());
103                    }
104                }
105                TokenType::EndTagClose => {
106                    let mut node = cur;
107                    let mut node_parent_list_length = parent_list.len();
108                    let end_tag_name = end_tag_name.as_deref();
109                    // see if we can find a matching tag
110                    while !(*node).is_same_tag(end_tag_name, case_sensitive)
111                        && node_parent_list_length > 0
112                    {
113                        node_parent_list_length -= 1;
114                        node = parent_list[node_parent_list_length];
115                    }
116                    if node_parent_list_length > 0 {
117                        while node_parent_list_length != parent_list.len() {
118                            (*cur).end = end_tag_start.unwrap();
119                            (*cur).closed = false;
120                            cur = parent_list.pop().unwrap();
121                        }
122                        (*cur).closed = true;
123                        (*cur).end_tag_start = end_tag_start;
124                        (*cur).end = scanner.get_token_end();
125                        cur = parent_list.pop().unwrap();
126                    }
127                }
128                TokenType::AttributeName => {
129                    let text = scanner.get_token_text();
130                    pending_attribute = Some(text.to_string());
131                    (*cur).attributes.insert(
132                        text.to_string(),
133                        NodeAttribute::new(None, scanner.get_token_offset()),
134                    ); // Support valueless attributes such as 'checked'
135                }
136                TokenType::DelimiterAssign => {
137                    if let Some(attr) = &pending_attribute {
138                        let value = (*cur).attributes.get_mut(attr).unwrap();
139                        value.value = Some("".to_string());
140                    }
141                }
142                TokenType::AttributeValue => {
143                    let text = scanner.get_token_text();
144                    if let Some(attr) = pending_attribute {
145                        let value = (*cur).attributes.get_mut(&attr).unwrap();
146                        value.value = Some(text.to_string());
147                        pending_attribute = None;
148                    }
149                }
150                _ => {}
151            }
152            token = scanner.scan();
153        }
154        while !parent_list.is_empty() {
155            (*cur).end = scanner.get_source_len();
156            (*cur).closed = false;
157            cur = parent_list.pop().unwrap();
158        }
159    }
160    let mut roots = vec![];
161    for root in html_document.children {
162        roots.push(root);
163    }
164    HTMLDocument { roots }
165}