html_languageservice/parser/
html_parse.rs1use crate::{
2 language_facts::data_manager::HTMLDataManager,
3 parser::html_scanner::{Scanner, TokenType},
4};
5use lsp_textdocument::FullTextDocument;
6
7use super::{
8 html_document::{HTMLDocument, Node, NodeAttribute},
9 html_scanner::ScannerState,
10};
11
12pub struct HTMLParser;
13
14impl HTMLParser {
15 pub fn parse_document(
16 document: &FullTextDocument,
17 data_manager: &HTMLDataManager,
18 ) -> HTMLDocument {
19 HTMLParser::parse(
20 document.get_content(None),
21 &document.language_id(),
22 data_manager,
23 )
24 }
25
26 pub fn parse(text: &str, language_id: &str, data_manager: &HTMLDataManager) -> HTMLDocument {
27 parse_html_document(text, language_id, &data_manager)
28 }
29}
30
31pub fn parse_html_document(
32 text: &str,
33 language_id: &str,
34 data_manager: &HTMLDataManager,
35) -> HTMLDocument {
36 let void_elements = data_manager.get_void_elements(language_id);
37 let mut scanner = Scanner::new(text, 0, ScannerState::WithinContent, true);
38
39 let mut html_document = Node::new(0, scanner.get_source_len(), vec![]);
40 let mut cur = &mut html_document as *mut Node;
41 let mut parent_list: Vec<*mut Node> = vec![];
42 let mut end_tag_start = None;
43 let mut end_tag_name = None;
44 let mut pending_attribute = None;
45 let mut token = scanner.scan();
46 unsafe {
47 while token != TokenType::EOS {
48 match token {
49 TokenType::StartTagOpen => {
50 let child =
51 Node::new(scanner.get_token_offset(), scanner.get_source_len(), vec![]);
52 let length = (*cur).children.len();
53 (*cur).children.push(child);
54 parent_list.push(cur);
55 cur = &mut (*cur).children[length];
56 }
57 TokenType::StartTag => {
58 (*cur).tag = Some(scanner.get_token_text().to_string());
59 }
60 TokenType::StartTagClose => {
61 if !parent_list.is_empty() {
62 (*cur).end = scanner.get_token_end();
63 if scanner.get_token_length() > 0 {
64 let tag = (*cur).tag.clone();
65 (*cur).start_tag_end = Some(scanner.get_token_end());
66 if tag.is_some()
67 && data_manager.is_void_element(&tag.unwrap(), &void_elements)
68 {
69 (*cur).closed = true;
70 cur = parent_list.pop().unwrap();
71 }
72 } else {
73 cur = parent_list.pop().unwrap();
75 }
76 }
77 }
78 TokenType::StartTagSelfClose => {
79 if !parent_list.is_empty() {
80 (*cur).closed = true;
81 (*cur).end = scanner.get_token_end();
82 (*cur).start_tag_end = Some(scanner.get_token_end());
83 cur = parent_list.pop().unwrap();
84 }
85 }
86 TokenType::EndTagOpen => {
87 end_tag_start = Some(scanner.get_token_offset());
88 end_tag_name = None;
89 }
90 TokenType::EndTag => {
91 end_tag_name = Some(scanner.get_token_text().to_string().to_lowercase());
92 }
93 TokenType::EndTagClose => {
94 let mut node = cur;
95 let mut node_parent_list_length = parent_list.len();
96 let end_tag_name = end_tag_name.as_deref();
97 while !(*node).is_same_tag(end_tag_name) && node_parent_list_length > 0 {
99 node_parent_list_length -= 1;
100 node = parent_list[node_parent_list_length];
101 }
102 if node_parent_list_length > 0 {
103 while node_parent_list_length != parent_list.len() {
104 (*cur).end = end_tag_start.unwrap();
105 (*cur).closed = false;
106 cur = parent_list.pop().unwrap();
107 }
108 (*cur).closed = true;
109 (*cur).end_tag_start = end_tag_start;
110 (*cur).end = scanner.get_token_end();
111 cur = parent_list.pop().unwrap();
112 }
113 }
114 TokenType::AttributeName => {
115 let text = scanner.get_token_text();
116 pending_attribute = Some(text.to_string());
117 (*cur).attributes.insert(
118 text.to_string(),
119 NodeAttribute::new(None, scanner.get_token_offset()),
120 ); }
122 TokenType::AttributeValue => {
123 let text = scanner.get_token_text();
124 if let Some(attr) = pending_attribute {
125 let offset = scanner.get_token_offset() - 1 - attr.len();
126 (*cur)
127 .attributes
128 .insert(attr, NodeAttribute::new(Some(text.to_string()), offset));
129 pending_attribute = None;
130 }
131 }
132 _ => {}
133 }
134 token = scanner.scan();
135 }
136 while !parent_list.is_empty() {
137 (*cur).end = scanner.get_source_len();
138 (*cur).closed = false;
139 cur = parent_list.pop().unwrap();
140 }
141 }
142 let mut roots = vec![];
143 for root in html_document.children {
144 roots.push(root);
145 }
146 HTMLDocument { roots }
147}