Skip to main content

normalize_languages/
html.rs

1//! HTML language support (parse only, minimal skeleton).
2
3use crate::external_packages::ResolvedPackage;
4use crate::{Export, Import, Language, Symbol, Visibility, VisibilityMechanism};
5use std::path::{Path, PathBuf};
6use tree_sitter::Node;
7
8/// HTML language support.
9pub struct Html;
10
11impl Language for Html {
12    fn name(&self) -> &'static str {
13        "HTML"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["html", "htm"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "html"
20    }
21
22    fn has_symbols(&self) -> bool {
23        false
24    }
25
26    // HTML has no functions/containers/types in the traditional sense
27    fn container_kinds(&self) -> &'static [&'static str] {
28        &[]
29    }
30    fn function_kinds(&self) -> &'static [&'static str] {
31        &[]
32    }
33    fn type_kinds(&self) -> &'static [&'static str] {
34        &[]
35    }
36    fn import_kinds(&self) -> &'static [&'static str] {
37        &[]
38    }
39    fn public_symbol_kinds(&self) -> &'static [&'static str] {
40        &[]
41    }
42    fn visibility_mechanism(&self) -> VisibilityMechanism {
43        VisibilityMechanism::NotApplicable
44    }
45    fn scope_creating_kinds(&self) -> &'static [&'static str] {
46        &[]
47    }
48    fn control_flow_kinds(&self) -> &'static [&'static str] {
49        &[]
50    }
51    fn complexity_nodes(&self) -> &'static [&'static str] {
52        &[]
53    }
54    fn nesting_nodes(&self) -> &'static [&'static str] {
55        &[]
56    }
57
58    fn signature_suffix(&self) -> &'static str {
59        ""
60    }
61
62    fn extract_function(
63        &self,
64        _node: &Node,
65        _content: &str,
66        _in_container: bool,
67    ) -> Option<Symbol> {
68        None
69    }
70
71    fn extract_container(&self, _node: &Node, _content: &str) -> Option<Symbol> {
72        None
73    }
74
75    fn extract_type(&self, _node: &Node, _content: &str) -> Option<Symbol> {
76        None
77    }
78    fn extract_docstring(&self, _node: &Node, _content: &str) -> Option<String> {
79        None
80    }
81
82    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
83        Vec::new()
84    }
85    fn extract_imports(&self, _node: &Node, _content: &str) -> Vec<Import> {
86        Vec::new()
87    }
88
89    fn format_import(&self, _import: &Import, _names: Option<&[&str]>) -> String {
90        // HTML has no imports
91        String::new()
92    }
93    fn extract_public_symbols(&self, _node: &Node, _content: &str) -> Vec<Export> {
94        Vec::new()
95    }
96
97    fn is_public(&self, _node: &Node, _content: &str) -> bool {
98        true
99    }
100    fn get_visibility(&self, _node: &Node, _content: &str) -> Visibility {
101        Visibility::Public
102    }
103
104    fn is_test_symbol(&self, _symbol: &crate::Symbol) -> bool {
105        false
106    }
107
108    fn embedded_content(&self, node: &Node, content: &str) -> Option<crate::EmbeddedBlock> {
109        match node.kind() {
110            "script_element" => {
111                let raw = find_raw_text_child(node)?;
112                let grammar = detect_script_type(node, content);
113                Some(crate::EmbeddedBlock {
114                    grammar,
115                    content: content[raw.byte_range()].to_string(),
116                    start_line: raw.start_position().row + 1,
117                })
118            }
119            "style_element" => {
120                let raw = find_raw_text_child(node)?;
121                Some(crate::EmbeddedBlock {
122                    grammar: "css",
123                    content: content[raw.byte_range()].to_string(),
124                    start_line: raw.start_position().row + 1,
125                })
126            }
127            _ => None,
128        }
129    }
130
131    fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
132        None
133    }
134    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
135        false
136    }
137    fn node_name<'a>(&self, _node: &Node, _content: &'a str) -> Option<&'a str> {
138        None
139    }
140
141    fn file_path_to_module_name(&self, _: &Path) -> Option<String> {
142        None
143    }
144    fn module_name_to_paths(&self, _: &str) -> Vec<String> {
145        Vec::new()
146    }
147
148    fn lang_key(&self) -> &'static str {
149        ""
150    }
151    fn resolve_local_import(&self, _: &str, _: &Path, _: &Path) -> Option<PathBuf> {
152        None
153    }
154    fn resolve_external_import(&self, _: &str, _: &Path) -> Option<ResolvedPackage> {
155        None
156    }
157    fn is_stdlib_import(&self, _: &str, _: &Path) -> bool {
158        false
159    }
160    fn get_version(&self, _: &Path) -> Option<String> {
161        None
162    }
163    fn find_package_cache(&self, _: &Path) -> Option<PathBuf> {
164        None
165    }
166    fn indexable_extensions(&self) -> &'static [&'static str] {
167        &[]
168    }
169    fn find_stdlib(&self, _: &Path) -> Option<PathBuf> {
170        None
171    }
172    fn package_module_name(&self, name: &str) -> String {
173        name.to_string()
174    }
175    fn package_sources(&self, _: &Path) -> Vec<crate::PackageSource> {
176        Vec::new()
177    }
178    fn discover_packages(&self, _: &crate::PackageSource) -> Vec<(String, PathBuf)> {
179        Vec::new()
180    }
181    fn find_package_entry(&self, _: &Path) -> Option<PathBuf> {
182        None
183    }
184
185    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
186        use crate::traits::{has_extension, skip_dotfiles};
187        if skip_dotfiles(name) {
188            return true;
189        }
190        !is_dir && !has_extension(name, self.indexable_extensions())
191    }
192}
193
194/// Find the raw_text child of a script/style element.
195fn find_raw_text_child<'a>(node: &'a Node<'a>) -> Option<Node<'a>> {
196    let mut cursor = node.walk();
197    for child in node.children(&mut cursor) {
198        if child.kind() == "raw_text" {
199            return Some(child);
200        }
201    }
202    None
203}
204
205/// Detect script type from the type attribute (e.g., <script type="module">).
206/// HTML scripts default to JavaScript; type="module" is still JavaScript.
207fn detect_script_type(node: &Node, content: &str) -> &'static str {
208    if let Some(script_type) = get_type_attribute(node, content) {
209        match script_type {
210            "text/typescript" => return "typescript",
211            "module" | "text/javascript" | "application/javascript" => return "javascript",
212            _ => {}
213        }
214    }
215    "javascript"
216}
217
218/// Get the type attribute value from a script element.
219fn get_type_attribute<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
220    let mut cursor = node.walk();
221    for child in node.children(&mut cursor) {
222        // Look for start_tag which contains the attributes
223        if child.kind() == "start_tag" {
224            let mut inner_cursor = child.walk();
225            for attr in child.children(&mut inner_cursor) {
226                if attr.kind() == "attribute" {
227                    // Check if this is a type attribute
228                    let mut attr_cursor = attr.walk();
229                    let mut is_type = false;
230                    for part in attr.children(&mut attr_cursor) {
231                        if part.kind() == "attribute_name" {
232                            let name = &content[part.byte_range()];
233                            is_type = name == "type";
234                        } else if is_type && part.kind() == "quoted_attribute_value" {
235                            // Get the value inside quotes
236                            let value = &content[part.byte_range()];
237                            return Some(value.trim_matches('"').trim_matches('\''));
238                        }
239                    }
240                }
241            }
242        }
243    }
244    None
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use crate::validate_unused_kinds_audit;
251
252    #[test]
253    fn unused_node_kinds_audit() {
254        #[rustfmt::skip]
255        let documented_unused: &[&str] = &[
256            "doctype",
257        ];
258
259        validate_unused_kinds_audit(&Html, documented_unused)
260            .expect("HTML unused node kinds audit failed");
261    }
262}