Skip to main content

panache_parser/parser/utils/
attributes.rs

1//! Parsing for Pandoc-style attributes: {#id .class key=value}
2//!
3//! Attributes can appear after headings, fenced code blocks, fenced divs, etc.
4//! Syntax: {#identifier .class1 .class2 key1=val1 key2="val2"}
5//!
6//! Rules:
7//! - Surrounded by { }
8//! - Identifier: #id (optional, only first one counts)
9//! - Classes: .class (can have multiple)
10//! - Key-value pairs: key=value or key="value" or key='value' (can have multiple)
11//! - Whitespace flexible between items
12
13use crate::syntax::SyntaxKind;
14use rowan::GreenNodeBuilder;
15
16#[derive(Debug, PartialEq)]
17pub struct AttributeBlock {
18    pub identifier: Option<String>,
19    pub classes: Vec<String>,
20    pub key_values: Vec<(String, String)>,
21}
22
23/// Try to parse an attribute block from the end of a string
24/// Returns: (attribute_block, text_before_attributes)
25pub fn try_parse_trailing_attributes(text: &str) -> Option<(AttributeBlock, &str)> {
26    let (attrs, before, _) = try_parse_trailing_attributes_with_pos(text)?;
27    Some((attrs, before))
28}
29
30/// Try to parse an attribute block from the end of a string.
31/// Returns: (attribute_block, text_before_attributes, open_brace_position_in_trimmed_text)
32pub fn try_parse_trailing_attributes_with_pos(text: &str) -> Option<(AttributeBlock, &str, usize)> {
33    let trimmed = text.trim_end();
34
35    // Must end with }
36    if !trimmed.ends_with('}') {
37        return None;
38    }
39
40    // Find matching opening brace for the trailing attribute block, accounting
41    // for braces inside quoted attribute values.
42    let open_brace = find_matching_open_brace_for_trailing_block(trimmed)?;
43
44    // Check if this is a bracketed span like [text]{.class} rather than a heading attribute
45    // If the { is immediately after ] (with optional whitespace), this should be parsed as a span
46    let before_brace = &trimmed[..open_brace];
47    if before_brace.trim_end().ends_with(']') {
48        log::trace!("Skipping attribute parsing for bracketed span: {}", text);
49        return None;
50    }
51
52    // Parse the content between { and }
53    let attr_content = &trimmed[open_brace + 1..trimmed.len() - 1];
54    let attr_block = parse_attribute_content(attr_content)?;
55
56    // Get text before attributes (trim trailing whitespace)
57    let before_attrs = trimmed[..open_brace].trim_end();
58
59    Some((attr_block, before_attrs, open_brace))
60}
61
62fn find_matching_open_brace_for_trailing_block(text: &str) -> Option<usize> {
63    if !text.ends_with('}') {
64        return None;
65    }
66
67    let mut stack: Vec<usize> = Vec::new();
68    let mut in_quote: Option<char> = None;
69    let mut escaped = false;
70    let mut end_brace_open = None;
71
72    for (idx, ch) in text.char_indices() {
73        if let Some(q) = in_quote {
74            if escaped {
75                escaped = false;
76                continue;
77            }
78            if ch == '\\' {
79                escaped = true;
80                continue;
81            }
82            if ch == q {
83                in_quote = None;
84            }
85            continue;
86        }
87
88        match ch {
89            '\'' | '"' => in_quote = Some(ch),
90            '{' => stack.push(idx),
91            '}' => {
92                let open = stack.pop()?;
93                if idx == text.len() - 1 {
94                    end_brace_open = Some(open);
95                }
96            }
97            _ => {}
98        }
99    }
100
101    if in_quote.is_some() || !stack.is_empty() {
102        return None;
103    }
104
105    end_brace_open
106}
107
108/// Parse the content inside the attribute braces
109pub fn parse_attribute_content(content: &str) -> Option<AttributeBlock> {
110    let mut identifier = None;
111    let mut classes = Vec::new();
112    let mut key_values = Vec::new();
113
114    let content = content.trim();
115    if content.is_empty() {
116        return None; // Empty {} is not valid
117    }
118
119    let mut pos = 0;
120    let bytes = content.as_bytes();
121
122    while pos < bytes.len() {
123        // Skip whitespace
124        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
125            pos += 1;
126        }
127
128        if pos >= bytes.len() {
129            break;
130        }
131
132        // Check what kind of attribute this is
133        if bytes[pos] == b'=' {
134            // Special case: {=format} for raw attributes
135            // This is treated as a class ".=format" for compatibility
136            pos += 1; // Skip =
137            let start = pos;
138            while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() && bytes[pos] != b'}' {
139                pos += 1;
140            }
141            if pos > start {
142                // Store as "=format" class (with the = prefix)
143                classes.push(format!("={}", &content[start..pos]));
144            }
145        } else if bytes[pos] == b'#' {
146            // Identifier (only take first one)
147            if identifier.is_none() {
148                pos += 1; // Skip #
149                let start = pos;
150                while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() && bytes[pos] != b'}' {
151                    pos += 1;
152                }
153                if pos > start {
154                    identifier = Some(content[start..pos].to_string());
155                }
156            } else {
157                // Skip duplicate identifiers
158                pos += 1;
159                while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() && bytes[pos] != b'}' {
160                    pos += 1;
161                }
162            }
163        } else if bytes[pos] == b'.' {
164            // Class
165            pos += 1; // Skip .
166            let start = pos;
167            while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() && bytes[pos] != b'}' {
168                pos += 1;
169            }
170            if pos > start {
171                classes.push(content[start..pos].to_string());
172            }
173        } else {
174            // Key-value pair
175            let key_start = pos;
176            while pos < bytes.len() && bytes[pos] != b'=' && !bytes[pos].is_ascii_whitespace() {
177                pos += 1;
178            }
179
180            if pos >= bytes.len() || bytes[pos] != b'=' {
181                // Not a valid key=value, skip this token
182                while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() {
183                    pos += 1;
184                }
185                continue;
186            }
187
188            let key = content[key_start..pos].to_string();
189            pos += 1; // Skip =
190
191            // Parse value (may be quoted)
192            let value = if pos < bytes.len() && (bytes[pos] == b'"' || bytes[pos] == b'\'') {
193                let quote = bytes[pos];
194                pos += 1; // Skip opening quote
195                let val_start = pos;
196                while pos < bytes.len() && bytes[pos] != quote {
197                    pos += 1;
198                }
199                let val = content[val_start..pos].to_string();
200                if pos < bytes.len() {
201                    pos += 1; // Skip closing quote
202                }
203                val
204            } else {
205                // Unquoted value
206                let val_start = pos;
207                while pos < bytes.len() && !bytes[pos].is_ascii_whitespace() && bytes[pos] != b'}' {
208                    pos += 1;
209                }
210                content[val_start..pos].to_string()
211            };
212
213            if !key.is_empty() {
214                key_values.push((key, value));
215            }
216        }
217    }
218
219    // At least one attribute must be present
220    if identifier.is_none() && classes.is_empty() && key_values.is_empty() {
221        return None;
222    }
223
224    Some(AttributeBlock {
225        identifier,
226        classes,
227        key_values,
228    })
229}
230
231/// Emit attribute block as AST nodes
232pub fn emit_attributes(builder: &mut GreenNodeBuilder, attrs: &AttributeBlock) {
233    builder.start_node(SyntaxKind::ATTRIBUTE.into());
234
235    // Build the attribute string to emit
236    let mut attr_str = String::from("{");
237
238    if let Some(ref id) = attrs.identifier {
239        attr_str.push('#');
240        attr_str.push_str(id);
241    }
242
243    for class in &attrs.classes {
244        if attr_str.len() > 1 {
245            attr_str.push(' ');
246        }
247        // Special case: if class starts with =, it's a raw format specifier
248        // Emit as {=format} not {.=format}
249        if class.starts_with('=') {
250            attr_str.push_str(class);
251        } else {
252            attr_str.push('.');
253            attr_str.push_str(class);
254        }
255    }
256
257    for (key, value) in &attrs.key_values {
258        if attr_str.len() > 1 {
259            attr_str.push(' ');
260        }
261        attr_str.push_str(key);
262        attr_str.push('=');
263
264        // Always quote attribute values to match Pandoc's behavior
265        attr_str.push('"');
266        attr_str.push_str(&value.replace('"', "\\\""));
267        attr_str.push('"');
268    }
269
270    attr_str.push('}');
271
272    builder.token(SyntaxKind::ATTRIBUTE.into(), &attr_str);
273    builder.finish_node();
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn test_simple_id() {
282        let result = try_parse_trailing_attributes("Heading {#my-id}");
283        assert!(result.is_some());
284        let (attrs, before) = result.unwrap();
285        assert_eq!(before, "Heading");
286        assert_eq!(attrs.identifier, Some("my-id".to_string()));
287        assert!(attrs.classes.is_empty());
288        assert!(attrs.key_values.is_empty());
289    }
290
291    #[test]
292    fn test_single_class() {
293        let result = try_parse_trailing_attributes("Text {.myclass}");
294        assert!(result.is_some());
295        let (attrs, _) = result.unwrap();
296        assert_eq!(attrs.classes, vec!["myclass"]);
297    }
298
299    #[test]
300    fn test_multiple_classes() {
301        let result = try_parse_trailing_attributes("Text {.class1 .class2 .class3}");
302        assert!(result.is_some());
303        let (attrs, _) = result.unwrap();
304        assert_eq!(attrs.classes, vec!["class1", "class2", "class3"]);
305    }
306
307    #[test]
308    fn test_key_value_unquoted() {
309        let result = try_parse_trailing_attributes("Text {key=value}");
310        assert!(result.is_some());
311        let (attrs, _) = result.unwrap();
312        assert_eq!(
313            attrs.key_values,
314            vec![("key".to_string(), "value".to_string())]
315        );
316    }
317
318    #[test]
319    fn test_key_value_quoted() {
320        let result = try_parse_trailing_attributes("Text {key=\"value with spaces\"}");
321        assert!(result.is_some());
322        let (attrs, _) = result.unwrap();
323        assert_eq!(
324            attrs.key_values,
325            vec![("key".to_string(), "value with spaces".to_string())]
326        );
327    }
328
329    #[test]
330    fn test_full_attributes() {
331        let result =
332            try_parse_trailing_attributes("Heading {#id .class1 .class2 key1=val1 key2=\"val 2\"}");
333        assert!(result.is_some());
334        let (attrs, before) = result.unwrap();
335        assert_eq!(before, "Heading");
336        assert_eq!(attrs.identifier, Some("id".to_string()));
337        assert_eq!(attrs.classes, vec!["class1", "class2"]);
338        assert_eq!(attrs.key_values.len(), 2);
339        assert_eq!(
340            attrs.key_values[0],
341            ("key1".to_string(), "val1".to_string())
342        );
343        assert_eq!(
344            attrs.key_values[1],
345            ("key2".to_string(), "val 2".to_string())
346        );
347    }
348
349    #[test]
350    fn test_trailing_attributes_with_shortcode_in_quoted_value() {
351        let text = "Slide Title {background-image='{{< placeholder 100 100 >}}' background-size=\"100px\"}";
352        let result = try_parse_trailing_attributes(text);
353        assert!(result.is_some());
354        let (attrs, before) = result.unwrap();
355        assert_eq!(before, "Slide Title");
356        assert_eq!(attrs.key_values.len(), 2);
357        assert_eq!(
358            attrs.key_values[0],
359            (
360                "background-image".to_string(),
361                "{{< placeholder 100 100 >}}".to_string()
362            )
363        );
364        assert_eq!(
365            attrs.key_values[1],
366            ("background-size".to_string(), "100px".to_string())
367        );
368    }
369
370    #[test]
371    fn test_no_attributes() {
372        let result = try_parse_trailing_attributes("Heading with no attributes");
373        assert!(result.is_none());
374    }
375
376    #[test]
377    fn test_empty_braces() {
378        let result = try_parse_trailing_attributes("Heading {}");
379        assert!(result.is_none());
380    }
381
382    #[test]
383    fn test_only_first_id_counts() {
384        let result = try_parse_trailing_attributes("Text {#id1 #id2}");
385        assert!(result.is_some());
386        let (attrs, _) = result.unwrap();
387        assert_eq!(attrs.identifier, Some("id1".to_string()));
388    }
389
390    #[test]
391    fn test_whitespace_handling() {
392        let result = try_parse_trailing_attributes("Text {  #id   .class   key=val  }");
393        assert!(result.is_some());
394        let (attrs, _) = result.unwrap();
395        assert_eq!(attrs.identifier, Some("id".to_string()));
396        assert_eq!(attrs.classes, vec!["class"]);
397        assert_eq!(
398            attrs.key_values,
399            vec![("key".to_string(), "val".to_string())]
400        );
401    }
402
403    #[test]
404    fn test_trailing_whitespace_before_attrs() {
405        let result = try_parse_trailing_attributes("Heading   {#id}");
406        assert!(result.is_some());
407        let (_, before) = result.unwrap();
408        assert_eq!(before, "Heading");
409    }
410}