html_languageservice/parser/
html_parse.rs1use crate::{
2 language_facts::data_manager::HTMLDataManager,
3 parser::html_scanner::{Scanner, TokenType},
4};
5use lsp_textdocument::FullTextDocument;
6
7use super::{
8 html_document::{HTMLDocument, Node, NodeAttribute},
9 html_scanner::ScannerState,
10};
11
12pub struct HTMLParser;
13
14impl HTMLParser {
15 pub fn parse_document(
16 document: &FullTextDocument,
17 data_manager: &HTMLDataManager,
18 case_sensitive: bool,
19 ) -> HTMLDocument {
20 HTMLParser::parse(
21 document.get_content(None),
22 &document.language_id(),
23 data_manager,
24 case_sensitive,
25 )
26 }
27
28 pub fn parse(
29 text: &str,
30 language_id: &str,
31 data_manager: &HTMLDataManager,
32 case_sensitive: bool,
33 ) -> HTMLDocument {
34 parse_html_document(text, language_id, &data_manager, case_sensitive)
35 }
36}
37
38pub fn parse_html_document(
39 text: &str,
40 language_id: &str,
41 data_manager: &HTMLDataManager,
42 case_sensitive: bool,
43) -> HTMLDocument {
44 let void_elements = data_manager.get_void_elements(language_id);
45 let mut scanner = Scanner::new(text, 0, ScannerState::WithinContent, true, case_sensitive);
46
47 let mut html_document = Node::new(0, scanner.get_source_len(), vec![]);
48 let mut cur = &mut html_document as *mut Node;
49 let mut parent_list: Vec<*mut Node> = vec![];
50 let mut end_tag_start = None;
51 let mut end_tag_name = None;
52 let mut pending_attribute = None;
53 let mut token = scanner.scan();
54 unsafe {
55 while token != TokenType::EOS {
56 match token {
57 TokenType::StartTagOpen => {
58 let child =
59 Node::new(scanner.get_token_offset(), scanner.get_source_len(), vec![]);
60 let length = (*cur).children.len();
61 (*cur).children.push(child);
62 parent_list.push(cur);
63 cur = &mut (*cur).children[length];
64 }
65 TokenType::StartTag => {
66 (*cur).tag = Some(scanner.get_token_text().to_string());
67 }
68 TokenType::StartTagClose => {
69 if !parent_list.is_empty() {
70 (*cur).end = scanner.get_token_end();
71 if scanner.get_token_length() > 0 {
72 let tag = (*cur).tag.clone();
73 (*cur).start_tag_end = Some(scanner.get_token_end());
74 if tag.is_some()
75 && data_manager.is_void_element(&tag.unwrap(), &void_elements)
76 {
77 (*cur).closed = true;
78 cur = parent_list.pop().unwrap();
79 }
80 } else {
81 cur = parent_list.pop().unwrap();
83 }
84 }
85 }
86 TokenType::StartTagSelfClose => {
87 if !parent_list.is_empty() {
88 (*cur).closed = true;
89 (*cur).end = scanner.get_token_end();
90 (*cur).start_tag_end = Some(scanner.get_token_end());
91 cur = parent_list.pop().unwrap();
92 }
93 }
94 TokenType::EndTagOpen => {
95 end_tag_start = Some(scanner.get_token_offset());
96 end_tag_name = None;
97 }
98 TokenType::EndTag => {
99 if case_sensitive {
100 end_tag_name = Some(scanner.get_token_text().to_string());
101 } else {
102 end_tag_name = Some(scanner.get_token_text().to_lowercase());
103 }
104 }
105 TokenType::EndTagClose => {
106 let mut node = cur;
107 let mut node_parent_list_length = parent_list.len();
108 let end_tag_name = end_tag_name.as_deref();
109 while !(*node).is_same_tag(end_tag_name, case_sensitive)
111 && node_parent_list_length > 0
112 {
113 node_parent_list_length -= 1;
114 node = parent_list[node_parent_list_length];
115 }
116 if node_parent_list_length > 0 {
117 while node_parent_list_length != parent_list.len() {
118 (*cur).end = end_tag_start.unwrap();
119 (*cur).closed = false;
120 cur = parent_list.pop().unwrap();
121 }
122 (*cur).closed = true;
123 (*cur).end_tag_start = end_tag_start;
124 (*cur).end = scanner.get_token_end();
125 cur = parent_list.pop().unwrap();
126 }
127 }
128 TokenType::AttributeName => {
129 let text = scanner.get_token_text();
130 pending_attribute = Some(text.to_string());
131 (*cur).attributes.insert(
132 text.to_string(),
133 NodeAttribute::new(None, scanner.get_token_offset()),
134 ); }
136 TokenType::DelimiterAssign => {
137 if let Some(attr) = &pending_attribute {
138 let value = (*cur).attributes.get_mut(attr).unwrap();
139 value.value = Some("".to_string());
140 }
141 }
142 TokenType::AttributeValue => {
143 let text = scanner.get_token_text();
144 if let Some(attr) = pending_attribute {
145 let value = (*cur).attributes.get_mut(&attr).unwrap();
146 value.value = Some(text.to_string());
147 pending_attribute = None;
148 }
149 }
150 _ => {}
151 }
152 token = scanner.scan();
153 }
154 while !parent_list.is_empty() {
155 (*cur).end = scanner.get_source_len();
156 (*cur).closed = false;
157 cur = parent_list.pop().unwrap();
158 }
159 }
160 let mut roots = vec![];
161 for root in html_document.children {
162 roots.push(root);
163 }
164 HTMLDocument { roots }
165}