Skip to main content

panache_parser/syntax/
attributes.rs

1use crate::parser::utils::attributes::{
2    AttributeBlock, parse_html_attribute_list, try_parse_trailing_attributes,
3};
4use crate::syntax::{AstNode, PanacheLanguage, SyntaxKind, SyntaxNode, SyntaxToken};
5
6#[derive(Debug, Clone, PartialEq, Eq, Hash)]
7pub struct AttributeNode(SyntaxNode);
8
9impl AstNode for AttributeNode {
10    type Language = PanacheLanguage;
11
12    fn can_cast(kind: SyntaxKind) -> bool {
13        matches!(
14            kind,
15            SyntaxKind::ATTRIBUTE | SyntaxKind::DIV_INFO | SyntaxKind::HTML_ATTRS
16        )
17    }
18
19    fn cast(node: SyntaxNode) -> Option<Self> {
20        Self::can_cast(node.kind()).then(|| AttributeNode(node))
21    }
22
23    fn syntax(&self) -> &SyntaxNode {
24        &self.0
25    }
26}
27
28impl AttributeNode {
29    /// Whether this node carries structured `ATTR_*` children. Only Pandoc
30    /// `{...}` attributes emitted by `emit_attribute_node` do; `DIV_INFO`,
31    /// `HTML_ATTRS`, and opaque fallbacks (MMD `[#id]` headers, raw-inline
32    /// `{=format}`, malformed bodies) keep a single inner text token and are
33    /// read via the reparse helpers below.
34    fn structured_id_token(&self) -> Option<SyntaxToken> {
35        self.0
36            .children_with_tokens()
37            .find(|el| el.kind() == SyntaxKind::ATTR_ID)
38            .and_then(|el| el.into_token())
39    }
40
41    fn has_structured_children(&self) -> bool {
42        self.0.children_with_tokens().any(|el| {
43            matches!(
44                el.kind(),
45                SyntaxKind::ATTR_ID | SyntaxKind::ATTR_CLASS | SyntaxKind::ATTR_KEY_VALUE
46            )
47        })
48    }
49
50    /// Reparse the opaque node text into an [`AttributeBlock`] (fallback path).
51    fn reparse(&self) -> Option<AttributeBlock> {
52        let text = self.0.text().to_string();
53        match self.0.kind() {
54            SyntaxKind::HTML_ATTRS => parse_html_attribute_list(&text),
55            _ => try_parse_trailing_attributes(&text).map(|(attrs, _)| attrs),
56        }
57    }
58
59    pub fn id(&self) -> Option<String> {
60        if self.has_structured_children() {
61            return self
62                .structured_id_token()
63                .map(|t| t.text().strip_prefix('#').unwrap_or(t.text()).to_string())
64                .filter(|id| !id.is_empty());
65        }
66        self.reparse()
67            .and_then(|attrs| attrs.identifier)
68            .filter(|id| !id.is_empty())
69    }
70
71    pub fn classes(&self) -> Vec<String> {
72        if self.has_structured_children() {
73            return self
74                .0
75                .children_with_tokens()
76                .filter(|el| el.kind() == SyntaxKind::ATTR_CLASS)
77                .filter_map(|el| el.into_token())
78                .map(|t| t.text().strip_prefix('.').unwrap_or(t.text()).to_string())
79                .collect();
80        }
81        self.reparse().map(|a| a.classes).unwrap_or_default()
82    }
83
84    pub fn key_values(&self) -> Vec<(String, String)> {
85        if self.has_structured_children() {
86            return self
87                .0
88                .children()
89                .filter(|n| n.kind() == SyntaxKind::ATTR_KEY_VALUE)
90                .map(|kv| {
91                    let key = child_token_text(&kv, SyntaxKind::ATTR_KEY).unwrap_or_default();
92                    let value = child_token_text(&kv, SyntaxKind::ATTR_VALUE)
93                        .map(|v| strip_value_quotes(&v))
94                        .unwrap_or_default();
95                    (key, value)
96                })
97                .collect();
98        }
99        self.reparse().map(|a| a.key_values).unwrap_or_default()
100    }
101
102    pub fn id_value_range(&self) -> Option<rowan::TextRange> {
103        if self.has_structured_children() {
104            // Precise inner-value range: the ATTR_ID token, minus its leading
105            // `#` for Pandoc `{...}` attributes. HTML_ATTRS ids are bare (no
106            // marker), so the whole token range is the value.
107            let tok = self.structured_id_token()?;
108            let r = tok.text_range();
109            let lead = if tok.text().starts_with('#') {
110                rowan::TextSize::from(1)
111            } else {
112                rowan::TextSize::from(0)
113            };
114            return Some(rowan::TextRange::new(r.start() + lead, r.end()));
115        }
116
117        let id = self.id()?;
118        let text = self.0.text().to_string();
119        let node_start: usize = self.0.text_range().start().into();
120        match self.0.kind() {
121            SyntaxKind::HTML_ATTRS => {
122                // Match `id=` followed by an optional quote and the id value.
123                // The salsa indexer uses this range for highlights / renames;
124                // a precise inner-value range is preferred over the full attr
125                // node range.
126                let marker = text.find("id")?;
127                let after_id = &text[marker + 2..];
128                let eq_off = after_id.bytes().position(|b| b == b'=')?;
129                let after_eq = &after_id[eq_off + 1..];
130                let (val_offset_in_after_eq, val_len) = match after_eq.bytes().next() {
131                    Some(b'"') | Some(b'\'') => (1, id.len()),
132                    _ => (0, id.len()),
133                };
134                let value_start_in_text = marker + 2 + eq_off + 1 + val_offset_in_after_eq;
135                let start = rowan::TextSize::from((node_start + value_start_in_text) as u32);
136                let end =
137                    rowan::TextSize::from((node_start + value_start_in_text + val_len) as u32);
138                Some(rowan::TextRange::new(start, end))
139            }
140            _ => {
141                let marker = text.find(&format!("#{}", id))?;
142                let start = rowan::TextSize::from((node_start + marker + 1) as u32);
143                let end = rowan::TextSize::from((node_start + marker + 1 + id.len()) as u32);
144                Some(rowan::TextRange::new(start, end))
145            }
146        }
147    }
148}
149
150/// Text of the first child token of `node` with the given kind.
151fn child_token_text(node: &SyntaxNode, kind: SyntaxKind) -> Option<String> {
152    node.children_with_tokens()
153        .find(|el| el.kind() == kind)
154        .and_then(|el| el.into_token())
155        .map(|t| t.text().to_string())
156}
157
158/// Strip a matching surrounding pair of `"`/`'` quotes from an attribute value.
159fn strip_value_quotes(raw: &str) -> String {
160    let bytes = raw.as_bytes();
161    if bytes.len() >= 2 {
162        let q = bytes[0];
163        if (q == b'"' || q == b'\'') && bytes[bytes.len() - 1] == q {
164            return raw[1..raw.len() - 1].to_string();
165        }
166    }
167    raw.to_string()
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn attribute_node_extracts_div_info_id_and_range() {
176        let config = crate::ParserOptions {
177            flavor: crate::options::Flavor::RMarkdown,
178            ..Default::default()
179        };
180        // The `{...}` div body is now structured into ATTR_* children, so the
181        // wrapper reads id/classes/key-values straight from the CST.
182        let tree = crate::parse("::: {#mu .exercise k=v}\ntext\n:::\n", Some(config));
183        let node = tree
184            .descendants()
185            .find_map(AttributeNode::cast)
186            .expect("attribute node");
187        assert_eq!(node.syntax().kind(), SyntaxKind::DIV_INFO);
188        assert!(node.has_structured_children());
189        assert_eq!(node.id().as_deref(), Some("mu"));
190        assert_eq!(node.classes(), vec!["exercise".to_string()]);
191        assert_eq!(node.key_values(), vec![("k".to_string(), "v".to_string())]);
192
193        // The id range points at the inner `mu`, derived from the ATTR_ID token.
194        let range = node.id_value_range().expect("id range");
195        let start: usize = range.start().into();
196        let end: usize = range.end().into();
197        assert_eq!(&tree.text().to_string()[start..end], "mu");
198    }
199
200    #[test]
201    fn attribute_node_reads_structured_children() {
202        let tree = crate::parse("# H {#x .a .b k=\"v w\"}\n", None);
203        let node = tree
204            .descendants()
205            .find_map(AttributeNode::cast)
206            .expect("attribute node");
207
208        assert_eq!(node.id().as_deref(), Some("x"));
209        assert_eq!(node.classes(), vec!["a".to_string(), "b".to_string()]);
210        assert_eq!(
211            node.key_values(),
212            vec![("k".to_string(), "v w".to_string())]
213        );
214
215        let range = node.id_value_range().expect("id range");
216        let start: usize = range.start().into();
217        let end: usize = range.end().into();
218        assert_eq!(&tree.text().to_string()[start..end], "x");
219    }
220}