rumdl_lib/utils/
kramdown_utils.rs

1//! Utilities for handling Kramdown-specific syntax
2//!
3//! Kramdown is a superset of Markdown that adds additional features like
4//! Inline Attribute Lists (IAL) for adding attributes to elements.
5
6use regex::Regex;
7use std::sync::LazyLock;
8
9use super::is_definition_list_item;
10
11/// Pattern for Kramdown span IAL: text{:.class #id key="value"}
12static SPAN_IAL_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{[:\.#][^}]*\}$").unwrap());
13
14/// Pattern for Kramdown extensions opening: {::comment}, {::nomarkdown}, etc.
15static EXTENSION_OPEN_PATTERN: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"^\s*\{::([a-z]+)(?:\s+[^}]*)?\}\s*$").unwrap());
17
18/// Pattern for Kramdown extensions closing: {:/comment}, {:/}, etc.
19static EXTENSION_CLOSE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\{:/([a-z]+)?\}\s*$").unwrap());
20
21/// Pattern for Kramdown options: {::options key="value" /}
22static OPTIONS_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\{::options\s+[^}]+/\}\s*$").unwrap());
23
24/// Pattern for footnote references: [^footnote]
25static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^[a-zA-Z0-9_\-]+\]").unwrap());
26
27/// Pattern for footnote definitions: [^footnote]: definition
28static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\[\^[a-zA-Z0-9_\-]+\]:").unwrap());
29
30/// Pattern for abbreviations: *[HTML]: HyperText Markup Language
31static ABBREVIATION_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\*\[[^\]]+\]:").unwrap());
32
33/// Pattern for math blocks: $$ or $
34static MATH_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\$\$").unwrap());
35static MATH_INLINE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$[^$]+\$").unwrap());
36
37/// Check if a line is a Kramdown block attribute (IAL - Inline Attribute List)
38///
39/// Kramdown IAL syntax allows adding attributes to block elements:
40/// - `{:.class}` - CSS class
41/// - `{:#id}` - Element ID
42/// - `{:attribute="value"}` - Generic attributes
43/// - `{:.class #id attribute="value"}` - Combinations
44///
45/// # Examples
46///
47/// ```
48/// use rumdl_lib::utils::kramdown_utils::is_kramdown_block_attribute;
49///
50/// assert!(is_kramdown_block_attribute("{:.wrap}"));
51/// assert!(is_kramdown_block_attribute("{:#my-id}"));
52/// assert!(is_kramdown_block_attribute("{:.class #id}"));
53/// assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
54///
55/// assert!(!is_kramdown_block_attribute("{just text}"));
56/// assert!(!is_kramdown_block_attribute("{}"));
57/// assert!(!is_kramdown_block_attribute("{"));
58/// ```
59pub fn is_kramdown_block_attribute(line: &str) -> bool {
60    let trimmed = line.trim();
61
62    // Must start with { and end with }
63    if !trimmed.starts_with('{') || !trimmed.ends_with('}') || trimmed.len() < 3 {
64        return false;
65    }
66
67    // Check if it matches Kramdown IAL patterns
68    // Valid patterns start with {: or {# or {.
69    let second_char = trimmed.chars().nth(1);
70    matches!(second_char, Some(':') | Some('#') | Some('.'))
71}
72
73/// Check if text ends with a Kramdown span IAL (inline attribute)
74///
75/// # Examples
76/// ```
77/// use rumdl_lib::utils::kramdown_utils::has_span_ial;
78///
79/// assert!(has_span_ial("*emphasized*{:.highlight}"));
80/// assert!(has_span_ial("[link](url){:target=\"_blank\"}"));
81/// assert!(!has_span_ial("regular text"));
82/// ```
83pub fn has_span_ial(text: &str) -> bool {
84    SPAN_IAL_PATTERN.is_match(text.trim())
85}
86
87/// Remove span IAL from text if present
88pub fn remove_span_ial(text: &str) -> &str {
89    if let Some(captures) = SPAN_IAL_PATTERN.find(text) {
90        &text[..captures.start()]
91    } else {
92        text
93    }
94}
95
96/// Check if a line is a Kramdown extension opening tag
97///
98/// Extensions include: comment, nomarkdown, options
99pub fn is_kramdown_extension_open(line: &str) -> bool {
100    EXTENSION_OPEN_PATTERN.is_match(line)
101}
102
103/// Check if a line is a Kramdown extension closing tag
104pub fn is_kramdown_extension_close(line: &str) -> bool {
105    EXTENSION_CLOSE_PATTERN.is_match(line)
106}
107
108/// Check if a line is a Kramdown options directive
109pub fn is_kramdown_options(line: &str) -> bool {
110    OPTIONS_PATTERN.is_match(line)
111}
112
113/// Check if a line is a Kramdown extension (any type)
114pub fn is_kramdown_extension(line: &str) -> bool {
115    is_kramdown_extension_open(line) || is_kramdown_extension_close(line) || is_kramdown_options(line)
116}
117
118/// Check if a line is an End-of-Block (EOB) marker
119///
120/// In Kramdown, a line containing only `^` ends the current block
121pub fn is_eob_marker(line: &str) -> bool {
122    line.trim() == "^"
123}
124
125/// Check if text contains a footnote reference
126pub fn has_footnote_reference(text: &str) -> bool {
127    FOOTNOTE_REF_PATTERN.is_match(text)
128}
129
130/// Check if a line is a footnote definition
131pub fn is_footnote_definition(line: &str) -> bool {
132    FOOTNOTE_DEF_PATTERN.is_match(line.trim_start())
133}
134
135/// Check if a line is an abbreviation definition
136pub fn is_abbreviation_definition(line: &str) -> bool {
137    ABBREVIATION_PATTERN.is_match(line.trim_start())
138}
139
140/// Check if a line starts a math block
141pub fn is_math_block_delimiter(line: &str) -> bool {
142    let trimmed = line.trim();
143    trimmed == "$$" || MATH_BLOCK_PATTERN.is_match(trimmed)
144}
145
146/// Check if text contains inline math
147pub fn has_inline_math(text: &str) -> bool {
148    MATH_INLINE_PATTERN.is_match(text)
149}
150
151/// Check if a line contains any Kramdown-specific syntax
152pub fn has_kramdown_syntax(line: &str) -> bool {
153    is_kramdown_block_attribute(line)
154        || has_span_ial(line)
155        || is_kramdown_extension(line)
156        || is_eob_marker(line)
157        || is_footnote_definition(line)
158        || is_abbreviation_definition(line)
159        || is_math_block_delimiter(line)
160        || is_definition_list_item(line)
161        || has_footnote_reference(line)
162        || has_inline_math(line)
163}
164
165/// Generate header ID following kramdown's algorithm
166///
167/// Based on the official kramdown 2.5.1 Ruby gem behavior (verified through testing):
168/// 1. Special symbol replacements (space & space -> --, space > space -> --, --> -> --)
169/// 2. Remove all characters except letters, numbers, spaces and dashes
170/// 3. Remove characters from start until first letter
171/// 4. Convert spaces to dashes, letters to lowercase, preserve numbers
172/// 5. Apply kramdown's hyphen consolidation ONLY to pre-existing hyphens
173/// 6. Remove leading dashes, preserve trailing dashes
174/// 7. If nothing remains, use "section"
175///
176/// This function is verified against the official kramdown Ruby implementation.
177pub fn heading_to_fragment(heading: &str) -> String {
178    if heading.is_empty() {
179        return "section".to_string();
180    }
181
182    let text = heading.trim();
183    if text.is_empty() {
184        return "section".to_string();
185    }
186
187    // Step 1: Remove all characters except letters, numbers, spaces and dashes FIRST
188    // This is crucial - kramdown removes colons and other chars before symbol replacement
189    let mut step1 = String::new();
190    for c in text.chars() {
191        if c.is_ascii_alphabetic() || c.is_ascii_digit() || c == ' ' || c == '-' {
192            step1.push(c);
193        }
194        // All other characters (::, _, accented chars, etc.) are REMOVED entirely
195    }
196
197    // Step 2: Apply special symbol replacements AFTER character filtering
198    let mut processed = step1;
199
200    // Handle special arrow sequences first
201    processed = processed.replace("-->", "--");
202
203    // Handle spaced symbols (& and > with spaces become double hyphens)
204    processed = processed.replace(" & ", "--");
205    processed = processed.replace(" > ", "--");
206
207    // Step 3: Apply hyphen consolidation to existing hyphens
208    let chars: Vec<char> = processed.chars().collect();
209    let mut hyphen_consolidated = String::new();
210    let mut i = 0;
211
212    while i < chars.len() {
213        let c = chars[i];
214
215        if c == '-' {
216            // Count consecutive hyphens
217            let mut hyphen_count = 0;
218            let mut j = i;
219            while j < chars.len() && chars[j] == '-' {
220                hyphen_count += 1;
221                j += 1;
222            }
223
224            // Apply kramdown consolidation rules to existing hyphens:
225            match hyphen_count {
226                1 => hyphen_consolidated.push('-'),
227                2 => {}                                  // 2 existing hyphens -> removed
228                3 => {}                                  // 3 existing hyphens -> removed
229                4 => hyphen_consolidated.push('-'),      // 4 -> 1
230                5 => {}                                  // 5 -> removed
231                6 => hyphen_consolidated.push_str("--"), // 6 -> 2
232                _ => {
233                    if hyphen_count % 2 == 0 && hyphen_count >= 6 {
234                        hyphen_consolidated.push_str("--");
235                    } else if hyphen_count % 4 == 0 {
236                        hyphen_consolidated.push('-');
237                    }
238                }
239            }
240
241            i = j;
242        } else {
243            hyphen_consolidated.push(c);
244            i += 1;
245        }
246    }
247
248    processed = hyphen_consolidated;
249
250    // Step 4: Remove characters from start until first letter
251    let mut start_pos = 0;
252    let mut found_letter = false;
253    for (i, c) in processed.char_indices() {
254        if c.is_ascii_alphabetic() {
255            start_pos = i;
256            found_letter = true;
257            break;
258        }
259    }
260
261    if !found_letter {
262        return "section".to_string();
263    }
264
265    let step2 = &processed[start_pos..];
266
267    // Step 5: Convert characters to final form (spaces become hyphens, no consolidation)
268    let mut result = String::new();
269    for c in step2.chars() {
270        if c.is_ascii_alphabetic() {
271            result.push(c.to_ascii_lowercase());
272        } else if c.is_ascii_digit() {
273            result.push(c);
274        } else {
275            // Spaces and remaining hyphens become hyphens (no further consolidation)
276            result.push('-');
277        }
278    }
279
280    // Step 6: Remove leading dashes only, preserve trailing dashes
281    let trimmed = result.trim_start_matches('-').to_string();
282
283    if trimmed.is_empty() {
284        "section".to_string()
285    } else {
286        trimmed
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_kramdown_class_attributes() {
296        assert!(is_kramdown_block_attribute("{:.wrap}"));
297        assert!(is_kramdown_block_attribute("{:.class-name}"));
298        assert!(is_kramdown_block_attribute("{:.multiple .classes}"));
299    }
300
301    #[test]
302    fn test_kramdown_id_attributes() {
303        assert!(is_kramdown_block_attribute("{:#my-id}"));
304        assert!(is_kramdown_block_attribute("{:#section-1}"));
305    }
306
307    #[test]
308    fn test_kramdown_generic_attributes() {
309        assert!(is_kramdown_block_attribute("{:style=\"color: red\"}"));
310        assert!(is_kramdown_block_attribute("{:data-value=\"123\"}"));
311    }
312
313    #[test]
314    fn test_kramdown_combined_attributes() {
315        assert!(is_kramdown_block_attribute("{:.class #id}"));
316        assert!(is_kramdown_block_attribute("{:#id .class style=\"color: blue\"}"));
317        assert!(is_kramdown_block_attribute("{:.wrap #my-code .highlight}"));
318    }
319
320    #[test]
321    fn test_non_kramdown_braces() {
322        assert!(!is_kramdown_block_attribute("{just some text}"));
323        assert!(!is_kramdown_block_attribute("{not kramdown}"));
324        assert!(!is_kramdown_block_attribute("{ spaces }"));
325    }
326
327    #[test]
328    fn test_edge_cases() {
329        assert!(!is_kramdown_block_attribute("{}"));
330        assert!(!is_kramdown_block_attribute("{"));
331        assert!(!is_kramdown_block_attribute("}"));
332        assert!(!is_kramdown_block_attribute(""));
333        assert!(!is_kramdown_block_attribute("not braces"));
334    }
335
336    #[test]
337    fn test_whitespace_handling() {
338        assert!(is_kramdown_block_attribute("  {:.wrap}  "));
339        assert!(is_kramdown_block_attribute("\t{:#id}\t"));
340        assert!(is_kramdown_block_attribute(" {:.class #id} "));
341    }
342}