Skip to main content

normalize_languages/
xml.rs

1//! XML language support with symbol extraction.
2//!
3//! XML elements are extracted as symbols: elements with child elements become
4//! Modules (containers), leaf elements become Variables. Tag name is the symbol name.
5
6use crate::{Language, LanguageSymbols};
7use tree_sitter::Node;
8
9/// XML language support.
10pub struct Xml;
11
12impl Language for Xml {
13    fn name(&self) -> &'static str {
14        "XML"
15    }
16    fn extensions(&self) -> &'static [&'static str] {
17        &["xml", "xsl", "xslt", "xsd", "svg", "plist"]
18    }
19    fn grammar_name(&self) -> &'static str {
20        "xml"
21    }
22
23    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
24        Some(self)
25    }
26
27    fn refine_kind(
28        &self,
29        node: &Node,
30        _content: &str,
31        tag_kind: crate::SymbolKind,
32    ) -> crate::SymbolKind {
33        if node.kind() == "element" && has_child_elements(node) {
34            return crate::SymbolKind::Module;
35        }
36        tag_kind
37    }
38
39    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
40        if node.kind() == "element" {
41            return extract_xml_tag_name(node, content);
42        }
43        None
44    }
45
46    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
47        if node.kind() == "element" && has_child_elements(node) {
48            // Return the content node which contains child elements
49            let mut cursor = node.walk();
50            for child in node.children(&mut cursor) {
51                if child.kind() == "content" {
52                    return Some(child);
53                }
54            }
55        }
56        None
57    }
58
59    fn build_signature(&self, node: &Node, content: &str) -> String {
60        if let Some(tag) = self.node_name(node, content) {
61            if let Some(attrs) = extract_key_attributes(node, content) {
62                return format!("<{} {}>", tag, attrs);
63            }
64            return format!("<{}>", tag);
65        }
66        content[node.byte_range()]
67            .lines()
68            .next()
69            .unwrap_or("")
70            .trim()
71            .to_string()
72    }
73}
74
75impl LanguageSymbols for Xml {}
76
77/// Check if an element has child elements in its content.
78fn has_child_elements(node: &Node) -> bool {
79    let mut cursor = node.walk();
80    for child in node.children(&mut cursor) {
81        if child.kind() == "content" {
82            let mut inner = child.walk();
83            for grandchild in child.children(&mut inner) {
84                if grandchild.kind() == "element" {
85                    return true;
86                }
87            }
88        }
89    }
90    false
91}
92
93/// Extract tag name from STag or EmptyElemTag.
94fn extract_xml_tag_name<'a>(node: &Node, content: &'a str) -> Option<&'a str> {
95    let mut cursor = node.walk();
96    for child in node.children(&mut cursor) {
97        if child.kind() == "STag" || child.kind() == "EmptyElemTag" {
98            let mut inner = child.walk();
99            for part in child.children(&mut inner) {
100                if part.kind() == "Name" {
101                    return Some(&content[part.byte_range()]);
102                }
103            }
104        }
105    }
106    None
107}
108
109/// Extract key attributes for the signature.
110fn extract_key_attributes(node: &Node, content: &str) -> Option<String> {
111    let mut cursor = node.walk();
112    for child in node.children(&mut cursor) {
113        if child.kind() == "STag" || child.kind() == "EmptyElemTag" {
114            let mut parts = Vec::new();
115            let mut inner = child.walk();
116            for attr in child.children(&mut inner) {
117                if attr.kind() == "Attribute" {
118                    let mut attr_cursor = attr.walk();
119                    let mut attr_name = None;
120                    let mut attr_val = None;
121                    for part in attr.children(&mut attr_cursor) {
122                        if part.kind() == "Name" {
123                            attr_name = Some(&content[part.byte_range()]);
124                        } else if part.kind() == "AttValue" {
125                            attr_val = Some(&content[part.byte_range()]);
126                        }
127                    }
128                    if let (Some(name), Some(val)) = (attr_name, attr_val)
129                        && (name == "id" || name == "class" || name == "name")
130                    {
131                        parts.push(format!("{}={}", name, val));
132                    }
133                }
134            }
135            if !parts.is_empty() {
136                return Some(parts.join(" "));
137            }
138        }
139    }
140    None
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use crate::validate_unused_kinds_audit;
147
148    #[test]
149    fn unused_node_kinds_audit() {
150        #[rustfmt::skip]
151        let documented_unused: &[&str] = &[
152            "Enumeration", "NotationType", "StringType", "TokenizedType",
153            "doctypedecl",
154        ];
155        validate_unused_kinds_audit(&Xml, documented_unused)
156            .expect("XML unused node kinds audit failed");
157    }
158}