normalize_languages/
html.rs1use crate::{Language, LanguageEmbedded, LanguageSymbols};
7use tree_sitter::Node;
8
9pub struct Html;
11
12impl Language for Html {
13 fn name(&self) -> &'static str {
14 "HTML"
15 }
16 fn extensions(&self) -> &'static [&'static str] {
17 &["html", "htm"]
18 }
19 fn grammar_name(&self) -> &'static str {
20 "html"
21 }
22
23 fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
24 Some(self)
25 }
26
27 fn as_embedded(&self) -> Option<&dyn LanguageEmbedded> {
28 Some(self)
29 }
30
31 fn refine_kind(
32 &self,
33 node: &Node,
34 _content: &str,
35 tag_kind: crate::SymbolKind,
36 ) -> crate::SymbolKind {
37 if node.kind() == "element" && has_child_elements(node) {
38 return crate::SymbolKind::Module;
39 }
40 tag_kind
41 }
42
43 fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
44 if node.kind() == "element"
45 || node.kind() == "script_element"
46 || node.kind() == "style_element"
47 {
48 return extract_html_tag_name(node, content);
49 }
50 None
51 }
52
53 fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
54 if node.kind() == "element" && has_child_elements(node) {
57 return Some(*node);
58 }
59 None
60 }
61
62 fn build_signature(&self, node: &Node, content: &str) -> String {
63 if let Some(tag) = self.node_name(node, content) {
64 if let Some(attrs) = extract_key_attributes(node, content) {
66 return format!("<{} {}>", tag, attrs);
67 }
68 return format!("<{}>", tag);
69 }
70 content[node.byte_range()]
71 .lines()
72 .next()
73 .unwrap_or("")
74 .trim()
75 .to_string()
76 }
77}
78
79impl LanguageSymbols for Html {}
80
81impl LanguageEmbedded for Html {
82 fn embedded_content(&self, node: &Node, content: &str) -> Option<crate::EmbeddedBlock> {
83 match node.kind() {
84 "script_element" => {
85 let raw = find_raw_text_child(node)?;
86 let grammar = detect_script_type(node, content);
87 Some(crate::EmbeddedBlock {
88 grammar,
89 content: content[raw.byte_range()].to_string(),
90 start_line: raw.start_position().row + 1,
91 })
92 }
93 "style_element" => {
94 let raw = find_raw_text_child(node)?;
95 Some(crate::EmbeddedBlock {
96 grammar: "css",
97 content: content[raw.byte_range()].to_string(),
98 start_line: raw.start_position().row + 1,
99 })
100 }
101 _ => None,
102 }
103 }
104}
105
106fn has_child_elements(node: &Node) -> bool {
108 let mut cursor = node.walk();
109 node.children(&mut cursor).any(|child| {
110 child.kind() == "element"
111 || child.kind() == "script_element"
112 || child.kind() == "style_element"
113 })
114}
115
116fn extract_html_tag_name<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
118 let mut cursor = node.walk();
119 for child in node.children(&mut cursor) {
120 if child.kind() == "start_tag" || child.kind() == "self_closing_tag" {
121 let mut inner = child.walk();
122 for part in child.children(&mut inner) {
123 if part.kind() == "tag_name" {
124 return Some(&content[part.byte_range()]);
125 }
126 }
127 }
128 }
129 None
130}
131
132fn extract_key_attributes(node: &Node, content: &str) -> Option<String> {
134 let mut cursor = node.walk();
135 for child in node.children(&mut cursor) {
136 if child.kind() == "start_tag" || child.kind() == "self_closing_tag" {
137 let mut parts = Vec::new();
138 let mut inner = child.walk();
139 for attr in child.children(&mut inner) {
140 if attr.kind() == "attribute" {
141 let mut attr_cursor = attr.walk();
142 let mut attr_name = None;
143 let mut attr_val = None;
144 for part in attr.children(&mut attr_cursor) {
145 if part.kind() == "attribute_name" {
146 attr_name = Some(&content[part.byte_range()]);
147 } else if part.kind() == "quoted_attribute_value" {
148 attr_val = Some(&content[part.byte_range()]);
149 }
150 }
151 if let (Some(name), Some(val)) = (attr_name, attr_val)
152 && (name == "id" || name == "class")
153 {
154 parts.push(format!("{}={}", name, val));
155 }
156 }
157 }
158 if !parts.is_empty() {
159 return Some(parts.join(" "));
160 }
161 }
162 }
163 None
164}
165
166fn find_raw_text_child<'a>(node: &'a Node<'a>) -> Option<Node<'a>> {
168 let mut cursor = node.walk();
169 node.children(&mut cursor)
170 .find(|&child| child.kind() == "raw_text")
171}
172
173fn detect_script_type(node: &Node, content: &str) -> &'static str {
176 if let Some(script_type) = get_type_attribute(node, content) {
177 match script_type {
178 "text/typescript" => return "typescript",
179 "module" | "text/javascript" | "application/javascript" => return "javascript",
180 _ => {}
181 }
182 }
183 "javascript"
184}
185
186fn get_type_attribute<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
188 let mut cursor = node.walk();
189 for child in node.children(&mut cursor) {
190 if child.kind() == "start_tag" {
192 let mut inner_cursor = child.walk();
193 for attr in child.children(&mut inner_cursor) {
194 if attr.kind() == "attribute" {
195 let mut attr_cursor = attr.walk();
197 let mut is_type = false;
198 for part in attr.children(&mut attr_cursor) {
199 if part.kind() == "attribute_name" {
200 let name = &content[part.byte_range()];
201 is_type = name == "type";
202 } else if is_type && part.kind() == "quoted_attribute_value" {
203 let value = &content[part.byte_range()];
205 return Some(value.trim_matches('"').trim_matches('\''));
206 }
207 }
208 }
209 }
210 }
211 }
212 None
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218 use crate::validate_unused_kinds_audit;
219
220 #[test]
221 fn unused_node_kinds_audit() {
222 #[rustfmt::skip]
223 let documented_unused: &[&str] = &[
224 "doctype",
225 ];
226
227 validate_unused_kinds_audit(&Html, documented_unused)
228 .expect("HTML unused node kinds audit failed");
229 }
230}