Skip to main content

rumdl_lib/rules/
heading_utils.rs

1use crate::utils::regex_cache::get_cached_regex;
2use std::fmt;
3use std::str::FromStr;
4
5const ATX_PATTERN_STR: &str = r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$";
6const SETEXT_HEADING_1_STR: &str = r"^(\s*)(=+)(\s*)$";
7const SETEXT_HEADING_2_STR: &str = r"^(\s*)(-+)(\s*)$";
8const HTML_TAG_REGEX_STR: &str = r"<[^>]*>";
9
10/// Represents different styles of Markdown headings
11#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
12pub enum HeadingStyle {
13    Atx,       // # Heading
14    AtxClosed, // # Heading #
15    Setext1,   // Heading
16    // =======
17    Setext2, // Heading
18    // -------
19    Consistent,          // For maintaining consistency with the first found header style
20    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
21    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
22}
23
24impl fmt::Display for HeadingStyle {
25    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26        let s = match self {
27            HeadingStyle::Atx => "atx",
28            HeadingStyle::AtxClosed => "atx-closed",
29            HeadingStyle::Setext1 => "setext1",
30            HeadingStyle::Setext2 => "setext2",
31            HeadingStyle::Consistent => "consistent",
32            HeadingStyle::SetextWithAtx => "setext-with-atx",
33            HeadingStyle::SetextWithAtxClosed => "setext-with-atx-closed",
34        };
35        write!(f, "{s}")
36    }
37}
38
39impl FromStr for HeadingStyle {
40    type Err = ();
41    fn from_str(s: &str) -> Result<Self, Self::Err> {
42        let normalized = s.trim().to_ascii_lowercase().replace('-', "_");
43        match normalized.as_str() {
44            "atx" => Ok(HeadingStyle::Atx),
45            "atx_closed" => Ok(HeadingStyle::AtxClosed),
46            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
47            "setext2" => Ok(HeadingStyle::Setext2),
48            "consistent" => Ok(HeadingStyle::Consistent),
49            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
50            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
51            _ => Err(()),
52        }
53    }
54}
55
56/// Utility functions for working with Markdown headings
57pub struct HeadingUtils;
58
59impl HeadingUtils {
60    /// Convert a heading to a different style
61    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
62        // Validate heading level
63        let level = level.clamp(1, 6);
64
65        if text_content.trim().is_empty() {
66            // Empty headings: ATX can be just `##`, Setext requires text so return empty
67            return match style {
68                HeadingStyle::Atx => "#".repeat(level as usize),
69                HeadingStyle::AtxClosed => {
70                    let hashes = "#".repeat(level as usize);
71                    format!("{hashes} {hashes}")
72                }
73                HeadingStyle::Setext1 | HeadingStyle::Setext2 => String::new(),
74                // These are meta-styles resolved before calling this function
75                HeadingStyle::Consistent | HeadingStyle::SetextWithAtx | HeadingStyle::SetextWithAtxClosed => {
76                    "#".repeat(level as usize)
77                }
78            };
79        }
80
81        let indentation = text_content
82            .chars()
83            .take_while(|c| c.is_whitespace())
84            .collect::<String>();
85        let text_content = text_content.trim();
86
87        match style {
88            HeadingStyle::Atx => {
89                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
90            }
91            HeadingStyle::AtxClosed => {
92                format!(
93                    "{}{} {} {}",
94                    indentation,
95                    "#".repeat(level as usize),
96                    text_content,
97                    "#".repeat(level as usize)
98                )
99            }
100            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
101                if level > 2 {
102                    // Fall back to ATX style for levels > 2
103                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
104                } else {
105                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
106                        '='
107                    } else {
108                        '-'
109                    };
110                    let visible_length = text_content.chars().count();
111                    let underline_length = visible_length.max(1); // Ensure at least 1 underline char
112                    format!(
113                        "{}{}\n{}{}",
114                        indentation,
115                        text_content,
116                        indentation,
117                        underline_char.to_string().repeat(underline_length)
118                    )
119                }
120            }
121            HeadingStyle::Consistent => {
122                // For Consistent style, default to ATX as it's the most commonly used
123                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
124            }
125            HeadingStyle::SetextWithAtx => {
126                if level <= 2 {
127                    // Use Setext for h1/h2
128                    let underline_char = if level == 1 { '=' } else { '-' };
129                    let visible_length = text_content.chars().count();
130                    let underline_length = visible_length.max(1);
131                    format!(
132                        "{}{}\n{}{}",
133                        indentation,
134                        text_content,
135                        indentation,
136                        underline_char.to_string().repeat(underline_length)
137                    )
138                } else {
139                    // Use ATX for h3-h6
140                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
141                }
142            }
143            HeadingStyle::SetextWithAtxClosed => {
144                if level <= 2 {
145                    // Use Setext for h1/h2
146                    let underline_char = if level == 1 { '=' } else { '-' };
147                    let visible_length = text_content.chars().count();
148                    let underline_length = visible_length.max(1);
149                    format!(
150                        "{}{}\n{}{}",
151                        indentation,
152                        text_content,
153                        indentation,
154                        underline_char.to_string().repeat(underline_length)
155                    )
156                } else {
157                    // Use ATX closed for h3-h6
158                    format!(
159                        "{}{} {} {}",
160                        indentation,
161                        "#".repeat(level as usize),
162                        text_content,
163                        "#".repeat(level as usize)
164                    )
165                }
166            }
167        }
168    }
169
170    /// Convert a heading text to a valid ID for fragment links
171    pub fn heading_to_fragment(text: &str) -> String {
172        // Remove any HTML tags
173        let text_no_html =
174            get_cached_regex(HTML_TAG_REGEX_STR).map_or_else(|_| text.into(), |re| re.replace_all(text, ""));
175
176        // Convert to lowercase and trim
177        let text_lower = text_no_html.trim().to_lowercase();
178
179        // Replace spaces and punctuation with hyphens
180        let text_with_hyphens = text_lower
181            .chars()
182            .map(|c| if c.is_alphanumeric() { c } else { '-' })
183            .collect::<String>();
184
185        // Replace multiple consecutive hyphens with a single hyphen
186        let text_clean = text_with_hyphens
187            .split('-')
188            .filter(|s| !s.is_empty())
189            .collect::<Vec<_>>()
190            .join("-");
191
192        // Remove leading and trailing hyphens
193        text_clean.trim_matches('-').to_string()
194    }
195}
196
197/// Checks if a line is a heading
198#[inline]
199pub fn is_heading(line: &str) -> bool {
200    // Fast path checks first
201    let trimmed = line.trim();
202    if trimmed.is_empty() {
203        return false;
204    }
205
206    if trimmed.starts_with('#') {
207        // Check for ATX heading
208        get_cached_regex(ATX_PATTERN_STR)
209            .map(|re| re.is_match(line))
210            .unwrap_or(false)
211    } else {
212        // We can't tell for setext headings without looking at the next line
213        false
214    }
215}
216
217/// Checks if a line is a setext heading marker
218#[inline]
219pub fn is_setext_heading_marker(line: &str) -> bool {
220    get_cached_regex(SETEXT_HEADING_1_STR)
221        .map(|re| re.is_match(line))
222        .unwrap_or(false)
223        || get_cached_regex(SETEXT_HEADING_2_STR)
224            .map(|re| re.is_match(line))
225            .unwrap_or(false)
226}
227
228/// Get the heading level for a line
229#[inline]
230pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
231    if index >= lines.len() {
232        return 0;
233    }
234
235    let line = lines[index];
236
237    // Check for ATX style heading
238    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
239        let hashes = captures.get(2).map_or("", |m| m.as_str());
240        return hashes.len() as u32;
241    }
242
243    // Check for setext style heading
244    if index < lines.len() - 1 {
245        let next_line = lines[index + 1];
246
247        if get_cached_regex(SETEXT_HEADING_1_STR)
248            .map(|re| re.is_match(next_line))
249            .unwrap_or(false)
250        {
251            return 1;
252        }
253
254        if get_cached_regex(SETEXT_HEADING_2_STR)
255            .map(|re| re.is_match(next_line))
256            .unwrap_or(false)
257        {
258            return 2;
259        }
260    }
261
262    0
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_heading_style_conversion() {
271        assert_eq!(
272            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
273            "# Heading 1"
274        );
275        assert_eq!(
276            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
277            "## Heading 2 ##"
278        );
279        assert_eq!(
280            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
281            "Heading 1\n========="
282        );
283        assert_eq!(
284            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
285            "Heading 2\n---------"
286        );
287    }
288
289    #[test]
290    fn test_convert_heading_style_edge_cases() {
291        // Empty text: ATX headings produce just the hash marks (valid markdown)
292        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "#");
293        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "#");
294        assert_eq!(HeadingUtils::convert_heading_style("", 2, HeadingStyle::Atx), "##");
295        assert_eq!(
296            HeadingUtils::convert_heading_style("", 1, HeadingStyle::AtxClosed),
297            "# #"
298        );
299        // Setext cannot represent empty headings, returns empty
300        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Setext1), "");
301
302        // Level clamping
303        assert_eq!(
304            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
305            "# Text"
306        );
307        assert_eq!(
308            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
309            "###### Text"
310        );
311
312        // Setext with level > 2 falls back to ATX
313        assert_eq!(
314            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
315            "### Text"
316        );
317
318        // Preserve indentation
319        assert_eq!(
320            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
321            "  # Text"
322        );
323
324        // Very short text for setext
325        assert_eq!(
326            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
327            "Hi\n=="
328        );
329    }
330
331    #[test]
332    fn test_heading_to_fragment() {
333        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
334        assert_eq!(
335            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
336            "heading-with-numbers-123"
337        );
338        assert_eq!(
339            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
340            "special-characters"
341        );
342        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
343        assert_eq!(
344            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
345            "multiple-spaces"
346        );
347        assert_eq!(
348            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
349            "heading-with-html"
350        );
351        assert_eq!(
352            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
353            "leading-dashes"
354        );
355        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
356    }
357
358    #[test]
359    fn test_module_level_functions() {
360        // Test is_heading
361        assert!(is_heading("# Heading"));
362        assert!(is_heading("  ## Indented"));
363        assert!(!is_heading("Not a heading"));
364        assert!(!is_heading(""));
365
366        // Test is_setext_heading_marker
367        assert!(is_setext_heading_marker("========"));
368        assert!(is_setext_heading_marker("--------"));
369        assert!(is_setext_heading_marker("  ======"));
370        assert!(!is_setext_heading_marker("# Heading"));
371        assert!(is_setext_heading_marker("---")); // Three dashes is valid
372
373        // Test get_heading_level
374        let lines = vec!["# H1", "## H2", "### H3"];
375        assert_eq!(get_heading_level(&lines, 0), 1);
376        assert_eq!(get_heading_level(&lines, 1), 2);
377        assert_eq!(get_heading_level(&lines, 2), 3);
378        assert_eq!(get_heading_level(&lines, 10), 0);
379    }
380
381    #[test]
382    fn test_heading_style_from_str() {
383        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
384        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
385        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
386        assert_eq!(HeadingStyle::from_str("atx-closed"), Ok(HeadingStyle::AtxClosed));
387        assert_eq!(HeadingStyle::from_str("ATX-CLOSED"), Ok(HeadingStyle::AtxClosed));
388        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
389        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
390        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
391        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
392        assert_eq!(
393            HeadingStyle::from_str("setext_with_atx"),
394            Ok(HeadingStyle::SetextWithAtx)
395        );
396        assert_eq!(
397            HeadingStyle::from_str("setext-with-atx"),
398            Ok(HeadingStyle::SetextWithAtx)
399        );
400        assert_eq!(
401            HeadingStyle::from_str("setext_with_atx_closed"),
402            Ok(HeadingStyle::SetextWithAtxClosed)
403        );
404        assert_eq!(
405            HeadingStyle::from_str("setext-with-atx-closed"),
406            Ok(HeadingStyle::SetextWithAtxClosed)
407        );
408        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
409    }
410
411    #[test]
412    fn test_heading_style_display() {
413        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
414        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx-closed");
415        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
416        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
417        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
418    }
419
420    #[test]
421    fn test_unicode_heading_fragments() {
422        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
423        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
424    }
425}