rumdl_lib/utils/
kramdown_utils.rs

1//! Utilities for handling Kramdown-specific syntax
2//!
3//! Kramdown is a superset of Markdown that adds additional features like
4//! Inline Attribute Lists (IAL) for adding attributes to elements.
5
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10    /// Pattern for Kramdown span IAL: text{:.class #id key="value"}
11    static ref SPAN_IAL_PATTERN: Regex = Regex::new(r"\{[:\.#][^}]*\}$").unwrap();
12
13    /// Pattern for Kramdown extensions opening: {::comment}, {::nomarkdown}, etc.
14    static ref EXTENSION_OPEN_PATTERN: Regex = Regex::new(r"^\s*\{::([a-z]+)(?:\s+[^}]*)?\}\s*$").unwrap();
15
16    /// Pattern for Kramdown extensions closing: {:/comment}, {:/}, etc.
17    static ref EXTENSION_CLOSE_PATTERN: Regex = Regex::new(r"^\s*\{:/([a-z]+)?\}\s*$").unwrap();
18
19    /// Pattern for Kramdown options: {::options key="value" /}
20    static ref OPTIONS_PATTERN: Regex = Regex::new(r"^\s*\{::options\s+[^}]+/\}\s*$").unwrap();
21
22    /// Pattern for footnote references: [^footnote]
23    static ref FOOTNOTE_REF_PATTERN: Regex = Regex::new(r"\[\^[a-zA-Z0-9_\-]+\]").unwrap();
24
25    /// Pattern for footnote definitions: [^footnote]: definition
26    static ref FOOTNOTE_DEF_PATTERN: Regex = Regex::new(r"^\[\^[a-zA-Z0-9_\-]+\]:").unwrap();
27
28    /// Pattern for abbreviations: *[HTML]: HyperText Markup Language
29    static ref ABBREVIATION_PATTERN: Regex = Regex::new(r"^\*\[[^\]]+\]:").unwrap();
30
31    /// Pattern for math blocks: $$ or $
32    static ref MATH_BLOCK_PATTERN: Regex = Regex::new(r"^\$\$").unwrap();
33    static ref MATH_INLINE_PATTERN: Regex = Regex::new(r"\$[^$]+\$").unwrap();
34}
35
36/// Check if a line is a Kramdown block attribute (IAL - Inline Attribute List)
37///
38/// Kramdown IAL syntax allows adding attributes to block elements:
39/// - `{:.class}` - CSS class
40/// - `{:#id}` - Element ID
41/// - `{:attribute="value"}` - Generic attributes
42/// - `{:.class #id attribute="value"}` - Combinations
43///
44/// # Examples
45///
46/// ```
47/// use rumdl_lib::utils::kramdown_utils::is_kramdown_block_attribute;
48///
49/// assert!(is_kramdown_block_attribute("{:.wrap}"));
50/// assert!(is_kramdown_block_attribute("{:#my-id}"));
51/// assert!(is_kramdown_block_attribute("{:.class #id}"));
52/// assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
53///
54/// assert!(!is_kramdown_block_attribute("{just text}"));
55/// assert!(!is_kramdown_block_attribute("{}"));
56/// assert!(!is_kramdown_block_attribute("{"));
57/// ```
58pub fn is_kramdown_block_attribute(line: &str) -> bool {
59    let trimmed = line.trim();
60
61    // Must start with { and end with }
62    if !trimmed.starts_with('{') || !trimmed.ends_with('}') || trimmed.len() < 3 {
63        return false;
64    }
65
66    // Check if it matches Kramdown IAL patterns
67    // Valid patterns start with {: or {# or {.
68    let second_char = trimmed.chars().nth(1);
69    matches!(second_char, Some(':') | Some('#') | Some('.'))
70}
71
72/// Check if text ends with a Kramdown span IAL (inline attribute)
73///
74/// # Examples
75/// ```
76/// use rumdl_lib::utils::kramdown_utils::has_span_ial;
77///
78/// assert!(has_span_ial("*emphasized*{:.highlight}"));
79/// assert!(has_span_ial("[link](url){:target=\"_blank\"}"));
80/// assert!(!has_span_ial("regular text"));
81/// ```
82pub fn has_span_ial(text: &str) -> bool {
83    SPAN_IAL_PATTERN.is_match(text.trim())
84}
85
86/// Remove span IAL from text if present
87pub fn remove_span_ial(text: &str) -> &str {
88    if let Some(captures) = SPAN_IAL_PATTERN.find(text) {
89        &text[..captures.start()]
90    } else {
91        text
92    }
93}
94
95/// Check if a line is a Kramdown extension opening tag
96///
97/// Extensions include: comment, nomarkdown, options
98pub fn is_kramdown_extension_open(line: &str) -> bool {
99    EXTENSION_OPEN_PATTERN.is_match(line)
100}
101
102/// Check if a line is a Kramdown extension closing tag
103pub fn is_kramdown_extension_close(line: &str) -> bool {
104    EXTENSION_CLOSE_PATTERN.is_match(line)
105}
106
107/// Check if a line is a Kramdown options directive
108pub fn is_kramdown_options(line: &str) -> bool {
109    OPTIONS_PATTERN.is_match(line)
110}
111
112/// Check if a line is a Kramdown extension (any type)
113pub fn is_kramdown_extension(line: &str) -> bool {
114    is_kramdown_extension_open(line) || is_kramdown_extension_close(line) || is_kramdown_options(line)
115}
116
117/// Check if a line is an End-of-Block (EOB) marker
118///
119/// In Kramdown, a line containing only `^` ends the current block
120pub fn is_eob_marker(line: &str) -> bool {
121    line.trim() == "^"
122}
123
124/// Check if text contains a footnote reference
125pub fn has_footnote_reference(text: &str) -> bool {
126    FOOTNOTE_REF_PATTERN.is_match(text)
127}
128
129/// Check if a line is a footnote definition
130pub fn is_footnote_definition(line: &str) -> bool {
131    FOOTNOTE_DEF_PATTERN.is_match(line.trim_start())
132}
133
134/// Check if a line is an abbreviation definition
135pub fn is_abbreviation_definition(line: &str) -> bool {
136    ABBREVIATION_PATTERN.is_match(line.trim_start())
137}
138
139/// Check if a line starts a math block
140pub fn is_math_block_delimiter(line: &str) -> bool {
141    let trimmed = line.trim();
142    trimmed == "$$" || MATH_BLOCK_PATTERN.is_match(trimmed)
143}
144
145/// Check if text contains inline math
146pub fn has_inline_math(text: &str) -> bool {
147    MATH_INLINE_PATTERN.is_match(text)
148}
149
150/// Check if a line is a definition list item
151///
152/// Definition lists in Kramdown use the pattern:
153/// ```text
154/// Term
155/// : Definition
156/// ```
157pub fn is_definition_list_item(line: &str) -> bool {
158    let trimmed = line.trim_start();
159    trimmed.starts_with(": ")
160        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
161}
162
163/// Check if a line contains any Kramdown-specific syntax
164pub fn has_kramdown_syntax(line: &str) -> bool {
165    is_kramdown_block_attribute(line)
166        || has_span_ial(line)
167        || is_kramdown_extension(line)
168        || is_eob_marker(line)
169        || is_footnote_definition(line)
170        || is_abbreviation_definition(line)
171        || is_math_block_delimiter(line)
172        || is_definition_list_item(line)
173        || has_footnote_reference(line)
174        || has_inline_math(line)
175}
176
177/// Generate header ID following kramdown's algorithm
178///
179/// Based on the official kramdown 2.5.1 Ruby gem behavior (verified through testing):
180/// 1. Special symbol replacements (space & space -> --, space > space -> --, --> -> --)
181/// 2. Remove all characters except letters, numbers, spaces and dashes
182/// 3. Remove characters from start until first letter
183/// 4. Convert spaces to dashes, letters to lowercase, preserve numbers
184/// 5. Apply kramdown's hyphen consolidation ONLY to pre-existing hyphens
185/// 6. Remove leading dashes, preserve trailing dashes
186/// 7. If nothing remains, use "section"
187///
188/// This function is verified against the official kramdown Ruby implementation.
189pub fn heading_to_fragment(heading: &str) -> String {
190    if heading.is_empty() {
191        return "section".to_string();
192    }
193
194    let text = heading.trim();
195    if text.is_empty() {
196        return "section".to_string();
197    }
198
199    // Step 1: Remove all characters except letters, numbers, spaces and dashes FIRST
200    // This is crucial - kramdown removes colons and other chars before symbol replacement
201    let mut step1 = String::new();
202    for c in text.chars() {
203        if c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ' ' || c == '-' {
204            step1.push(c);
205        }
206        // All other characters (::, _, accented chars, etc.) are REMOVED entirely
207    }
208
209    // Step 2: Apply special symbol replacements AFTER character filtering
210    let mut processed = step1;
211
212    // Handle special arrow sequences first
213    processed = processed.replace("-->", "--");
214
215    // Handle spaced symbols (& and > with spaces become double hyphens)
216    processed = processed.replace(" & ", "--");
217    processed = processed.replace(" > ", "--");
218
219    // Step 3: Apply hyphen consolidation to existing hyphens
220    let chars: Vec<char> = processed.chars().collect();
221    let mut hyphen_consolidated = String::new();
222    let mut i = 0;
223
224    while i < chars.len() {
225        let c = chars[i];
226
227        if c == '-' {
228            // Count consecutive hyphens
229            let mut hyphen_count = 0;
230            let mut j = i;
231            while j < chars.len() && chars[j] == '-' {
232                hyphen_count += 1;
233                j += 1;
234            }
235
236            // Apply kramdown consolidation rules to existing hyphens:
237            match hyphen_count {
238                1 => hyphen_consolidated.push('-'),
239                2 => {}                                  // 2 existing hyphens -> removed
240                3 => {}                                  // 3 existing hyphens -> removed
241                4 => hyphen_consolidated.push('-'),      // 4 -> 1
242                5 => {}                                  // 5 -> removed
243                6 => hyphen_consolidated.push_str("--"), // 6 -> 2
244                _ => {
245                    if hyphen_count % 2 == 0 && hyphen_count >= 6 {
246                        hyphen_consolidated.push_str("--");
247                    } else if hyphen_count % 4 == 0 {
248                        hyphen_consolidated.push('-');
249                    }
250                }
251            }
252
253            i = j;
254        } else {
255            hyphen_consolidated.push(c);
256            i += 1;
257        }
258    }
259
260    processed = hyphen_consolidated;
261
262    // Step 4: Remove characters from start until first letter
263    let mut start_pos = 0;
264    let mut found_letter = false;
265    for (i, c) in processed.char_indices() {
266        if c.is_ascii_alphabetic() {
267            start_pos = i;
268            found_letter = true;
269            break;
270        }
271    }
272
273    if !found_letter {
274        return "section".to_string();
275    }
276
277    let step2 = &processed[start_pos..];
278
279    // Step 5: Convert characters to final form (spaces become hyphens, no consolidation)
280    let mut result = String::new();
281    for c in step2.chars() {
282        if c.is_ascii_alphabetic() {
283            result.push(c.to_ascii_lowercase());
284        } else if c.is_ascii_digit() {
285            result.push(c);
286        } else {
287            // Spaces and remaining hyphens become hyphens (no further consolidation)
288            result.push('-');
289        }
290    }
291
292    // Step 6: Remove leading dashes only, preserve trailing dashes
293    let trimmed = result.trim_start_matches('-').to_string();
294
295    if trimmed.is_empty() {
296        "section".to_string()
297    } else {
298        trimmed
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305
306    #[test]
307    fn test_kramdown_class_attributes() {
308        assert!(is_kramdown_block_attribute("{:.wrap}"));
309        assert!(is_kramdown_block_attribute("{:.class-name}"));
310        assert!(is_kramdown_block_attribute("{:.multiple .classes}"));
311    }
312
313    #[test]
314    fn test_kramdown_id_attributes() {
315        assert!(is_kramdown_block_attribute("{:#my-id}"));
316        assert!(is_kramdown_block_attribute("{:#section-1}"));
317    }
318
319    #[test]
320    fn test_kramdown_generic_attributes() {
321        assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
322        assert!(is_kramdown_block_attribute("{:data-value=\"123\"}"));
323    }
324
325    #[test]
326    fn test_kramdown_combined_attributes() {
327        assert!(is_kramdown_block_attribute("{:.class #id}"));
328        assert!(is_kramdown_block_attribute("{:#id .class style=\"color: blue\"}"));
329        assert!(is_kramdown_block_attribute("{:.wrap #my-code .highlight}"));
330    }
331
332    #[test]
333    fn test_non_kramdown_braces() {
334        assert!(!is_kramdown_block_attribute("{just some text}"));
335        assert!(!is_kramdown_block_attribute("{not kramdown}"));
336        assert!(!is_kramdown_block_attribute("{ spaces }"));
337    }
338
339    #[test]
340    fn test_edge_cases() {
341        assert!(!is_kramdown_block_attribute("{}"));
342        assert!(!is_kramdown_block_attribute("{"));
343        assert!(!is_kramdown_block_attribute("}"));
344        assert!(!is_kramdown_block_attribute(""));
345        assert!(!is_kramdown_block_attribute("not braces"));
346    }
347
348    #[test]
349    fn test_whitespace_handling() {
350        assert!(is_kramdown_block_attribute("  {:.wrap}  "));
351        assert!(is_kramdown_block_attribute("\t{:#id}\t"));
352        assert!(is_kramdown_block_attribute(" {:.class #id} "));
353    }
354}