rumdl_lib/rules/
heading_utils.rs

1use lazy_static::lazy_static;
2use regex::Regex;
3use std::fmt;
4use std::str::FromStr;
5
6lazy_static! {
7    // Optimized regex patterns with more efficient non-capturing groups
8    static ref ATX_PATTERN: Regex = Regex::new(r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$").unwrap();
9    static ref SETEXT_HEADING_1: Regex = Regex::new(r"^(\s*)(=+)(\s*)$").unwrap();
10    static ref SETEXT_HEADING_2: Regex = Regex::new(r"^(\s*)(-+)(\s*)$").unwrap();
11    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,}).*$").unwrap();
12    static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})\s*$").unwrap();
13    static ref FRONT_MATTER_DELIMITER: Regex = Regex::new(r"^---\s*$").unwrap();
14    static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
15    static ref HTML_TAG_REGEX: Regex = Regex::new(r"<[^>]*>").unwrap();
16
17    // Single line emphasis patterns
18    static ref SINGLE_LINE_ASTERISK_EMPHASIS: Regex = Regex::new(r"^\s*\*([^*\n]+)\*\s*$").unwrap();
19    static ref SINGLE_LINE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"^\s*_([^_\n]+)_\s*$").unwrap();
20    static ref SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS: Regex = Regex::new(r"^\s*\*\*([^*\n]+)\*\*\s*$").unwrap();
21    static ref SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"^\s*__([^_\n]+)__\s*$").unwrap();
22}
23
24/// Represents different styles of Markdown headings
25#[derive(Debug, Clone, PartialEq, Copy)]
26pub enum HeadingStyle {
27    Atx,       // # Heading
28    AtxClosed, // # Heading #
29    Setext1,   // Heading
30    // =======
31    Setext2, // Heading
32    // -------
33    Consistent,          // For maintaining consistency with the first found header style
34    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
35    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
36}
37
38impl fmt::Display for HeadingStyle {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        let s = match self {
41            HeadingStyle::Atx => "atx",
42            HeadingStyle::AtxClosed => "atx_closed",
43            HeadingStyle::Setext1 => "setext1",
44            HeadingStyle::Setext2 => "setext2",
45            HeadingStyle::Consistent => "consistent",
46            HeadingStyle::SetextWithAtx => "setext_with_atx",
47            HeadingStyle::SetextWithAtxClosed => "setext_with_atx_closed",
48        };
49        write!(f, "{s}")
50    }
51}
52
53impl FromStr for HeadingStyle {
54    type Err = ();
55    fn from_str(s: &str) -> Result<Self, Self::Err> {
56        match s.to_ascii_lowercase().as_str() {
57            "atx" => Ok(HeadingStyle::Atx),
58            "atx_closed" => Ok(HeadingStyle::AtxClosed),
59            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
60            "setext2" => Ok(HeadingStyle::Setext2),
61            "consistent" => Ok(HeadingStyle::Consistent),
62            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
63            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
64            _ => Err(()),
65        }
66    }
67}
68
69/// Represents a heading in a Markdown document
70#[derive(Debug, Clone, PartialEq)]
71pub struct Heading {
72    pub text: String,
73    pub level: u32,
74    pub style: HeadingStyle,
75    pub line_number: usize,
76    pub original_text: String,
77    pub indentation: String,
78}
79
80/// Utility functions for working with Markdown headings
81pub struct HeadingUtils;
82
83impl HeadingUtils {
84    /// Check if a line is an ATX heading (starts with #)
85    pub fn is_atx_heading(line: &str) -> bool {
86        ATX_PATTERN.is_match(line)
87    }
88
89    /// Check if a line is inside a code block
90    pub fn is_in_code_block(content: &str, line_number: usize) -> bool {
91        let mut in_code_block = false;
92        let mut fence_char = None;
93        let mut line_count = 0;
94
95        for line in content.lines() {
96            line_count += 1;
97            if line_count > line_number {
98                break;
99            }
100
101            let trimmed = line.trim();
102            if trimmed.len() >= 3 {
103                let first_chars: Vec<char> = trimmed.chars().take(3).collect();
104                if first_chars.iter().all(|&c| c == '`' || c == '~') {
105                    if let Some(current_fence) = fence_char {
106                        if first_chars[0] == current_fence && first_chars.iter().all(|&c| c == current_fence) {
107                            in_code_block = false;
108                            fence_char = None;
109                        }
110                    } else {
111                        in_code_block = true;
112                        fence_char = Some(first_chars[0]);
113                    }
114                }
115            }
116        }
117
118        in_code_block
119    }
120
121    /// Parse a line into a Heading struct if it's a valid heading
122    pub fn parse_heading(content: &str, line_num: usize) -> Option<Heading> {
123        let lines: Vec<&str> = content.lines().collect();
124        if line_num == 0 || line_num > lines.len() {
125            return None;
126        }
127
128        let line = lines[line_num - 1];
129
130        // Skip if line is within a code block
131        if Self::is_in_code_block(content, line_num) {
132            return None;
133        }
134
135        // Check for ATX style headings
136        if let Some(captures) = ATX_PATTERN.captures(line) {
137            let indentation = captures.get(1).map_or("", |m| m.as_str()).to_string();
138            let opening_hashes = captures.get(2).map_or("", |m| m.as_str());
139            let level = opening_hashes.len() as u32;
140            let text = captures.get(4).map_or("", |m| m.as_str()).to_string();
141
142            let style = if let Some(closing) = captures.get(5) {
143                let closing_hashes = closing.as_str();
144                if closing_hashes.len() == opening_hashes.len() {
145                    HeadingStyle::AtxClosed
146                } else {
147                    HeadingStyle::Atx
148                }
149            } else {
150                HeadingStyle::Atx
151            };
152
153            let heading = Heading {
154                text: text.clone(),
155                level,
156                style,
157                line_number: line_num,
158                original_text: line.to_string(),
159                indentation: indentation.clone(),
160            };
161            return Some(heading);
162        }
163
164        // Check for Setext style headings
165        if line_num < lines.len() {
166            let next_line = lines[line_num];
167            let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
168
169            // Skip empty lines - don't consider them as potential Setext headings
170            if line.trim().is_empty() {
171                return None;
172            }
173
174            // Skip list items - they shouldn't be considered as potential Setext headings
175            if line.trim_start().starts_with('-')
176                || line.trim_start().starts_with('*')
177                || line.trim_start().starts_with('+')
178                || line.trim_start().starts_with("1.")
179            {
180                return None;
181            }
182
183            // Skip front matter delimiters or lines within front matter
184            if line.trim() == "---" || Self::is_in_front_matter(content, line_num - 1) {
185                return None;
186            }
187
188            if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
189                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
190                if underline_indent == line_indentation {
191                    let heading = Heading {
192                        text: line[line_indentation.len()..].to_string(),
193                        level: 1,
194                        style: HeadingStyle::Setext1,
195                        line_number: line_num,
196                        original_text: format!("{line}\n{next_line}"),
197                        indentation: line_indentation.clone(),
198                    };
199                    return Some(heading);
200                }
201            } else if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
202                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
203                if underline_indent == line_indentation {
204                    let heading = Heading {
205                        text: line[line_indentation.len()..].to_string(),
206                        level: 2,
207                        style: HeadingStyle::Setext2,
208                        line_number: line_num,
209                        original_text: format!("{line}\n{next_line}"),
210                        indentation: line_indentation.clone(),
211                    };
212                    return Some(heading);
213                }
214            }
215        }
216
217        None
218    }
219
220    /// Get the indentation level of a line
221    pub fn get_indentation(line: &str) -> usize {
222        line.len() - line.trim_start().len()
223    }
224
225    /// Convert a heading to a different style
226    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
227        if text_content.trim().is_empty() {
228            return String::new();
229        }
230
231        // Validate heading level
232        let level = level.clamp(1, 6);
233        let indentation = text_content
234            .chars()
235            .take_while(|c| c.is_whitespace())
236            .collect::<String>();
237        let text_content = text_content.trim();
238
239        match style {
240            HeadingStyle::Atx => {
241                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
242            }
243            HeadingStyle::AtxClosed => {
244                format!(
245                    "{}{} {} {}",
246                    indentation,
247                    "#".repeat(level as usize),
248                    text_content,
249                    "#".repeat(level as usize)
250                )
251            }
252            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
253                if level > 2 {
254                    // Fall back to ATX style for levels > 2
255                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
256                } else {
257                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
258                        '='
259                    } else {
260                        '-'
261                    };
262                    let visible_length = text_content.chars().count();
263                    let underline_length = visible_length.max(3); // Ensure at least 3 underline chars
264                    format!(
265                        "{}{}\n{}{}",
266                        indentation,
267                        text_content,
268                        indentation,
269                        underline_char.to_string().repeat(underline_length)
270                    )
271                }
272            }
273            HeadingStyle::Consistent => {
274                // For Consistent style, default to ATX as it's the most commonly used
275                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
276            }
277            HeadingStyle::SetextWithAtx => {
278                if level <= 2 {
279                    // Use Setext for h1/h2
280                    let underline_char = if level == 1 { '=' } else { '-' };
281                    let visible_length = text_content.chars().count();
282                    let underline_length = visible_length.max(3);
283                    format!(
284                        "{}{}\n{}{}",
285                        indentation,
286                        text_content,
287                        indentation,
288                        underline_char.to_string().repeat(underline_length)
289                    )
290                } else {
291                    // Use ATX for h3-h6
292                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
293                }
294            }
295            HeadingStyle::SetextWithAtxClosed => {
296                if level <= 2 {
297                    // Use Setext for h1/h2
298                    let underline_char = if level == 1 { '=' } else { '-' };
299                    let visible_length = text_content.chars().count();
300                    let underline_length = visible_length.max(3);
301                    format!(
302                        "{}{}\n{}{}",
303                        indentation,
304                        text_content,
305                        indentation,
306                        underline_char.to_string().repeat(underline_length)
307                    )
308                } else {
309                    // Use ATX closed for h3-h6
310                    format!(
311                        "{}{} {} {}",
312                        indentation,
313                        "#".repeat(level as usize),
314                        text_content,
315                        "#".repeat(level as usize)
316                    )
317                }
318            }
319        }
320    }
321
322    /// Get the text content of a heading line
323    pub fn get_heading_text(line: &str) -> Option<String> {
324        ATX_PATTERN
325            .captures(line)
326            .map(|captures| captures.get(4).map_or("", |m| m.as_str()).trim().to_string())
327    }
328
329    /// Detect emphasis-only lines
330    pub fn is_emphasis_only_line(line: &str) -> bool {
331        let trimmed = line.trim();
332        SINGLE_LINE_ASTERISK_EMPHASIS.is_match(trimmed)
333            || SINGLE_LINE_UNDERSCORE_EMPHASIS.is_match(trimmed)
334            || SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS.is_match(trimmed)
335            || SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS.is_match(trimmed)
336    }
337
338    /// Extract text from an emphasis-only line
339    pub fn extract_emphasis_text(line: &str) -> Option<(String, u32)> {
340        let trimmed = line.trim();
341
342        if let Some(caps) = SINGLE_LINE_ASTERISK_EMPHASIS.captures(trimmed) {
343            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
344        }
345
346        if let Some(caps) = SINGLE_LINE_UNDERSCORE_EMPHASIS.captures(trimmed) {
347            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
348        }
349
350        if let Some(caps) = SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS.captures(trimmed) {
351            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
352        }
353
354        if let Some(caps) = SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS.captures(trimmed) {
355            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
356        }
357
358        None
359    }
360
361    /// Convert emphasis to heading
362    pub fn convert_emphasis_to_heading(line: &str) -> Option<String> {
363        // Preserve the original indentation
364        let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
365        // Preserve trailing spaces at the end of the line
366        let trailing = if line.ends_with(" ") {
367            line.chars().rev().take_while(|c| c.is_whitespace()).collect::<String>()
368        } else {
369            String::new()
370        };
371
372        if let Some((text, level)) = Self::extract_emphasis_text(line) {
373            // Preserve the original indentation and trailing spaces
374            Some(format!(
375                "{}{} {}{}",
376                indentation,
377                "#".repeat(level as usize),
378                text,
379                trailing
380            ))
381        } else {
382            None
383        }
384    }
385
386    /// Convert a heading text to a valid ID for fragment links
387    pub fn heading_to_fragment(text: &str) -> String {
388        // Remove any HTML tags
389        let text_no_html = HTML_TAG_REGEX.replace_all(text, "");
390
391        // Convert to lowercase and trim
392        let text_lower = text_no_html.trim().to_lowercase();
393
394        // Replace spaces and punctuation with hyphens
395        let text_with_hyphens = text_lower
396            .chars()
397            .map(|c| if c.is_alphanumeric() { c } else { '-' })
398            .collect::<String>();
399
400        // Replace multiple consecutive hyphens with a single hyphen
401        let text_clean = text_with_hyphens
402            .split('-')
403            .filter(|s| !s.is_empty())
404            .collect::<Vec<_>>()
405            .join("-");
406
407        // Remove leading and trailing hyphens
408        text_clean.trim_matches('-').to_string()
409    }
410
411    /// Check if a line is in front matter
412    pub fn is_in_front_matter(content: &str, line_number: usize) -> bool {
413        let lines: Vec<&str> = content.lines().collect();
414        if lines.is_empty() || line_number >= lines.len() {
415            return false;
416        }
417
418        // Check if the document starts with front matter
419        if !lines[0].trim_start().eq("---") {
420            return false;
421        }
422
423        let mut in_front_matter = true;
424        let mut found_closing = false;
425
426        // Skip the first line (opening delimiter)
427        for (i, line) in lines.iter().enumerate().skip(1) {
428            if i > line_number {
429                break;
430            }
431
432            if line.trim_start().eq("---") {
433                found_closing = true;
434                in_front_matter = i > line_number;
435                break;
436            }
437        }
438
439        in_front_matter && !found_closing
440    }
441}
442
443/// Checks if a line is a heading
444#[inline]
445pub fn is_heading(line: &str) -> bool {
446    // Fast path checks first
447    let trimmed = line.trim();
448    if trimmed.is_empty() {
449        return false;
450    }
451
452    if trimmed.starts_with('#') {
453        // Check for ATX heading
454        ATX_PATTERN.is_match(line)
455    } else {
456        // We can't tell for setext headings without looking at the next line
457        false
458    }
459}
460
461/// Checks if a line is a setext heading marker
462#[inline]
463pub fn is_setext_heading_marker(line: &str) -> bool {
464    SETEXT_HEADING_1.is_match(line) || SETEXT_HEADING_2.is_match(line)
465}
466
467/// Checks if a line is a setext heading by examining its next line
468#[inline]
469pub fn is_setext_heading(lines: &[&str], index: usize) -> bool {
470    if index >= lines.len() - 1 {
471        return false;
472    }
473
474    let current_line = lines[index];
475    let next_line = lines[index + 1];
476
477    // Skip if current line is empty
478    if current_line.trim().is_empty() {
479        return false;
480    }
481
482    // Check if next line is a setext heading marker with same indentation
483    let current_indentation = current_line
484        .chars()
485        .take_while(|c| c.is_whitespace())
486        .collect::<String>();
487
488    if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
489        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
490        return underline_indent == current_indentation;
491    }
492
493    if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
494        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
495        return underline_indent == current_indentation;
496    }
497
498    false
499}
500
501/// Get the heading level for a line
502#[inline]
503pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
504    if index >= lines.len() {
505        return 0;
506    }
507
508    let line = lines[index];
509
510    // Check for ATX style heading
511    if let Some(captures) = ATX_PATTERN.captures(line) {
512        let hashes = captures.get(2).map_or("", |m| m.as_str());
513        return hashes.len() as u32;
514    }
515
516    // Check for setext style heading
517    if index < lines.len() - 1 {
518        let next_line = lines[index + 1];
519
520        if SETEXT_HEADING_1.is_match(next_line) {
521            return 1;
522        }
523
524        if SETEXT_HEADING_2.is_match(next_line) {
525            return 2;
526        }
527    }
528
529    0
530}
531
532/// Extract the text content from a heading
533#[inline]
534pub fn extract_heading_text(lines: &[&str], index: usize) -> String {
535    if index >= lines.len() {
536        return String::new();
537    }
538
539    let line = lines[index];
540
541    // Extract from ATX heading
542    if let Some(captures) = ATX_PATTERN.captures(line) {
543        return captures.get(4).map_or("", |m| m.as_str()).trim().to_string();
544    }
545
546    // Extract from setext heading
547    if index < lines.len() - 1 {
548        let next_line = lines[index + 1];
549        let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
550
551        if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
552            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
553            if underline_indent == line_indentation {
554                return line[line_indentation.len()..].trim().to_string();
555            }
556        }
557
558        if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
559            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
560            if underline_indent == line_indentation {
561                return line[line_indentation.len()..].trim().to_string();
562            }
563        }
564    }
565
566    line.trim().to_string()
567}
568
569/// Get the indentation of a heading
570#[inline]
571pub fn get_heading_indentation(lines: &[&str], index: usize) -> usize {
572    if index >= lines.len() {
573        return 0;
574    }
575
576    let line = lines[index];
577    line.len() - line.trim_start().len()
578}
579
580/// Check if a line is a code block delimiter
581#[inline]
582pub fn is_code_block_delimiter(line: &str) -> bool {
583    FENCED_CODE_BLOCK_START.is_match(line) || FENCED_CODE_BLOCK_END.is_match(line)
584}
585
586/// Check if a line is a front matter delimiter
587#[inline]
588pub fn is_front_matter_delimiter(line: &str) -> bool {
589    FRONT_MATTER_DELIMITER.is_match(line)
590}
591
592/// Remove trailing hashes from a heading
593#[inline]
594pub fn remove_trailing_hashes(text: &str) -> String {
595    let trimmed = text.trim_end();
596
597    // Find the last hash
598    if let Some(last_hash_index) = trimmed.rfind('#') {
599        // Check if everything after this position is only hashes and whitespace
600        if trimmed[last_hash_index..]
601            .chars()
602            .all(|c| c == '#' || c.is_whitespace())
603        {
604            // Find the start of the trailing hash sequence
605            let mut first_hash_index = last_hash_index;
606            while first_hash_index > 0 {
607                let prev_index = first_hash_index - 1;
608                if trimmed.chars().nth(prev_index) == Some('#') {
609                    first_hash_index = prev_index;
610                } else {
611                    break;
612                }
613            }
614
615            // Remove the trailing hashes
616            return trimmed[..first_hash_index].trim_end().to_string();
617        }
618    }
619
620    trimmed.to_string()
621}
622
623/// Normalize a heading to the specified level
624#[inline]
625pub fn normalize_heading(line: &str, level: u32) -> String {
626    let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
627    let trimmed = line.trim_start();
628
629    if trimmed.starts_with('#') {
630        if let Some(text) = HeadingUtils::get_heading_text(line) {
631            format!("{}{} {}", indentation, "#".repeat(level as usize), text)
632        } else {
633            line.to_string()
634        }
635    } else {
636        format!("{}{} {}", indentation, "#".repeat(level as usize), trimmed)
637    }
638}
639
640#[cfg(test)]
641mod tests {
642    use super::*;
643
644    #[test]
645    fn test_atx_heading_parsing() {
646        let content = "# Heading 1\n## Heading 2\n### Heading 3";
647        assert!(HeadingUtils::parse_heading(content, 1).is_some());
648        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
649        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().level, 2);
650        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 3);
651    }
652
653    #[test]
654    fn test_setext_heading_parsing() {
655        let content = "Heading 1\n=========\nHeading 2\n---------";
656        assert!(HeadingUtils::parse_heading(content, 1).is_some());
657        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
658        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 2);
659    }
660
661    #[test]
662    fn test_heading_style_conversion() {
663        assert_eq!(
664            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
665            "# Heading 1"
666        );
667        assert_eq!(
668            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
669            "## Heading 2 ##"
670        );
671        assert_eq!(
672            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
673            "Heading 1\n========="
674        );
675        assert_eq!(
676            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
677            "Heading 2\n---------"
678        );
679    }
680
681    #[test]
682    fn test_code_block_detection() {
683        let content = "# Heading\n```\n# Not a heading\n```\n# Another heading";
684        assert!(!HeadingUtils::is_in_code_block(content, 0));
685        assert!(HeadingUtils::is_in_code_block(content, 2));
686        assert!(!HeadingUtils::is_in_code_block(content, 4));
687    }
688
689    #[test]
690    fn test_empty_line_with_dashes() {
691        // Test that an empty line followed by dashes is not considered a heading
692        let content = "\n---";
693
694        // Empty line is at index 0, dashes at index 1
695        assert_eq!(
696            HeadingUtils::parse_heading(content, 1),
697            None,
698            "Empty line followed by dashes should not be detected as a heading"
699        );
700
701        // Also test with a regular horizontal rule
702        let content2 = "Some content\n\n---\nMore content";
703        assert_eq!(
704            HeadingUtils::parse_heading(content2, 2),
705            None,
706            "Empty line followed by horizontal rule should not be detected as a heading"
707        );
708    }
709
710    #[test]
711    fn test_is_atx_heading() {
712        assert!(HeadingUtils::is_atx_heading("# Heading"));
713        assert!(HeadingUtils::is_atx_heading("## Heading"));
714        assert!(HeadingUtils::is_atx_heading("### Heading"));
715        assert!(HeadingUtils::is_atx_heading("#### Heading"));
716        assert!(HeadingUtils::is_atx_heading("##### Heading"));
717        assert!(HeadingUtils::is_atx_heading("###### Heading"));
718        assert!(HeadingUtils::is_atx_heading("  # Indented"));
719        assert!(HeadingUtils::is_atx_heading("# Heading #"));
720        assert!(HeadingUtils::is_atx_heading("## Heading ###"));
721
722        assert!(!HeadingUtils::is_atx_heading("####### Too many"));
723        assert!(!HeadingUtils::is_atx_heading("Not a heading"));
724        assert!(HeadingUtils::is_atx_heading("#")); // Single # is a valid heading
725        assert!(!HeadingUtils::is_atx_heading(""));
726    }
727
728    #[test]
729    fn test_heading_edge_cases() {
730        // Test invalid line numbers
731        let content = "# Heading";
732        assert!(HeadingUtils::parse_heading(content, 0).is_none());
733        assert!(HeadingUtils::parse_heading(content, 10).is_none());
734
735        // Test headings in code blocks
736        let content = "```\n# Not a heading\n```";
737        assert!(HeadingUtils::parse_heading(content, 2).is_none());
738
739        // Test with tildes for code blocks
740        let content = "~~~\n# Not a heading\n~~~";
741        assert!(HeadingUtils::is_in_code_block(content, 2));
742
743        // Test mixed fence characters
744        let content = "```\n# Content\n~~~"; // Mismatched fences
745        assert!(HeadingUtils::is_in_code_block(content, 2));
746    }
747
748    #[test]
749    fn test_atx_closed_heading_variations() {
750        let content = "# Heading #\n## Heading ##\n### Heading ####\n#### Heading ##";
751        let h1 = HeadingUtils::parse_heading(content, 1).unwrap();
752        assert_eq!(h1.style, HeadingStyle::AtxClosed);
753        assert_eq!(h1.text, "Heading");
754
755        let h2 = HeadingUtils::parse_heading(content, 2).unwrap();
756        assert_eq!(h2.style, HeadingStyle::AtxClosed);
757
758        // Mismatched closing hashes - still ATX but not closed
759        let h3 = HeadingUtils::parse_heading(content, 3).unwrap();
760        assert_eq!(h3.style, HeadingStyle::Atx);
761
762        let h4 = HeadingUtils::parse_heading(content, 4).unwrap();
763        assert_eq!(h4.style, HeadingStyle::Atx);
764    }
765
766    #[test]
767    fn test_setext_heading_edge_cases() {
768        // List item followed by dashes should not be a heading
769        let content = "- List item\n---------";
770        assert!(HeadingUtils::parse_heading(content, 1).is_none());
771
772        // Front matter should not be a heading
773        let content = "---\ntitle: test\n---";
774        assert!(HeadingUtils::parse_heading(content, 1).is_none());
775
776        // Indented setext headings
777        let content = "  Indented\n  ========";
778        let heading = HeadingUtils::parse_heading(content, 1).unwrap();
779        assert_eq!(heading.indentation, "  ");
780        assert_eq!(heading.text, "Indented");
781
782        // Mismatched indentation should not be a heading
783        let content = "  Text\n========"; // No indent on underline
784        assert!(HeadingUtils::parse_heading(content, 1).is_none());
785    }
786
787    #[test]
788    fn test_get_indentation() {
789        assert_eq!(HeadingUtils::get_indentation("# Heading"), 0);
790        assert_eq!(HeadingUtils::get_indentation("  # Heading"), 2);
791        assert_eq!(HeadingUtils::get_indentation("    # Heading"), 4);
792        assert_eq!(HeadingUtils::get_indentation("\t# Heading"), 1);
793        assert_eq!(HeadingUtils::get_indentation(""), 0);
794    }
795
796    #[test]
797    fn test_convert_heading_style_edge_cases() {
798        // Empty text
799        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "");
800        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "");
801
802        // Level clamping
803        assert_eq!(
804            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
805            "# Text"
806        );
807        assert_eq!(
808            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
809            "###### Text"
810        );
811
812        // Setext with level > 2 falls back to ATX
813        assert_eq!(
814            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
815            "### Text"
816        );
817
818        // Preserve indentation
819        assert_eq!(
820            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
821            "  # Text"
822        );
823
824        // Very short text for setext
825        assert_eq!(
826            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
827            "Hi\n==="
828        );
829    }
830
831    #[test]
832    fn test_get_heading_text() {
833        assert_eq!(HeadingUtils::get_heading_text("# Heading"), Some("Heading".to_string()));
834        assert_eq!(
835            HeadingUtils::get_heading_text("## Heading ##"),
836            Some("Heading".to_string())
837        );
838        assert_eq!(
839            HeadingUtils::get_heading_text("###   Spaces   "),
840            Some("Spaces".to_string())
841        );
842        assert_eq!(HeadingUtils::get_heading_text("Not a heading"), None);
843        assert_eq!(HeadingUtils::get_heading_text(""), None);
844    }
845
846    #[test]
847    fn test_emphasis_detection() {
848        assert!(HeadingUtils::is_emphasis_only_line("*emphasis*"));
849        assert!(HeadingUtils::is_emphasis_only_line("_emphasis_"));
850        assert!(HeadingUtils::is_emphasis_only_line("**strong**"));
851        assert!(HeadingUtils::is_emphasis_only_line("__strong__"));
852        assert!(HeadingUtils::is_emphasis_only_line("  *emphasis*  "));
853
854        assert!(!HeadingUtils::is_emphasis_only_line("*not* emphasis"));
855        assert!(!HeadingUtils::is_emphasis_only_line("text *emphasis*"));
856        assert!(!HeadingUtils::is_emphasis_only_line("**"));
857        assert!(!HeadingUtils::is_emphasis_only_line(""));
858    }
859
860    #[test]
861    fn test_extract_emphasis_text() {
862        assert_eq!(
863            HeadingUtils::extract_emphasis_text("*text*"),
864            Some(("text".to_string(), 1))
865        );
866        assert_eq!(
867            HeadingUtils::extract_emphasis_text("_text_"),
868            Some(("text".to_string(), 1))
869        );
870        assert_eq!(
871            HeadingUtils::extract_emphasis_text("**text**"),
872            Some(("text".to_string(), 2))
873        );
874        assert_eq!(
875            HeadingUtils::extract_emphasis_text("__text__"),
876            Some(("text".to_string(), 2))
877        );
878        assert_eq!(
879            HeadingUtils::extract_emphasis_text("  *spaced*  "),
880            Some(("spaced".to_string(), 1))
881        );
882
883        assert_eq!(HeadingUtils::extract_emphasis_text("not emphasis"), None);
884        assert_eq!(HeadingUtils::extract_emphasis_text("*not* complete"), None);
885    }
886
887    #[test]
888    fn test_convert_emphasis_to_heading() {
889        assert_eq!(
890            HeadingUtils::convert_emphasis_to_heading("*text*"),
891            Some("# text".to_string())
892        );
893        assert_eq!(
894            HeadingUtils::convert_emphasis_to_heading("**text**"),
895            Some("## text".to_string())
896        );
897        assert_eq!(
898            HeadingUtils::convert_emphasis_to_heading("  *text*"),
899            Some("  # text".to_string())
900        );
901        assert_eq!(
902            HeadingUtils::convert_emphasis_to_heading("*text* "),
903            Some("# text ".to_string())
904        );
905
906        assert_eq!(HeadingUtils::convert_emphasis_to_heading("not emphasis"), None);
907    }
908
909    #[test]
910    fn test_heading_to_fragment() {
911        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
912        assert_eq!(
913            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
914            "heading-with-numbers-123"
915        );
916        assert_eq!(
917            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
918            "special-characters"
919        );
920        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
921        assert_eq!(
922            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
923            "multiple-spaces"
924        );
925        assert_eq!(
926            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
927            "heading-with-html"
928        );
929        assert_eq!(
930            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
931            "leading-dashes"
932        );
933        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
934    }
935
936    #[test]
937    fn test_is_in_front_matter() {
938        let content = "---\ntitle: Test\n---\n# Content";
939        assert!(HeadingUtils::is_in_front_matter(content, 1));
940        assert!(!HeadingUtils::is_in_front_matter(content, 2)); // Closing delimiter is not considered in front matter
941        assert!(!HeadingUtils::is_in_front_matter(content, 3));
942        assert!(!HeadingUtils::is_in_front_matter(content, 4));
943
944        // No front matter
945        let content = "# Just content";
946        assert!(!HeadingUtils::is_in_front_matter(content, 0));
947
948        // Unclosed front matter
949        let content = "---\ntitle: Test\n# No closing";
950        assert!(HeadingUtils::is_in_front_matter(content, 1));
951        assert!(HeadingUtils::is_in_front_matter(content, 2)); // Still in unclosed front matter
952
953        // Front matter not at start
954        let content = "# Heading\n---\ntitle: Test\n---";
955        assert!(!HeadingUtils::is_in_front_matter(content, 2));
956    }
957
958    #[test]
959    fn test_module_level_functions() {
960        // Test is_heading
961        assert!(is_heading("# Heading"));
962        assert!(is_heading("  ## Indented"));
963        assert!(!is_heading("Not a heading"));
964        assert!(!is_heading(""));
965
966        // Test is_setext_heading_marker
967        assert!(is_setext_heading_marker("========"));
968        assert!(is_setext_heading_marker("--------"));
969        assert!(is_setext_heading_marker("  ======"));
970        assert!(!is_setext_heading_marker("# Heading"));
971        assert!(is_setext_heading_marker("---")); // Three dashes is valid
972
973        // Test is_setext_heading
974        let lines = vec!["Title", "====="];
975        assert!(is_setext_heading(&lines, 0));
976
977        let lines = vec!["", "====="];
978        assert!(!is_setext_heading(&lines, 0));
979
980        // Test get_heading_level
981        let lines = vec!["# H1", "## H2", "### H3"];
982        assert_eq!(get_heading_level(&lines, 0), 1);
983        assert_eq!(get_heading_level(&lines, 1), 2);
984        assert_eq!(get_heading_level(&lines, 2), 3);
985        assert_eq!(get_heading_level(&lines, 10), 0);
986
987        // Test extract_heading_text
988        let lines = vec!["# Heading Text", "## Another ###"];
989        assert_eq!(extract_heading_text(&lines, 0), "Heading Text");
990        assert_eq!(extract_heading_text(&lines, 1), "Another");
991
992        // Test get_heading_indentation
993        let lines = vec!["# No indent", "  ## Two spaces", "    ### Four spaces"];
994        assert_eq!(get_heading_indentation(&lines, 0), 0);
995        assert_eq!(get_heading_indentation(&lines, 1), 2);
996        assert_eq!(get_heading_indentation(&lines, 2), 4);
997    }
998
999    #[test]
1000    fn test_is_code_block_delimiter() {
1001        assert!(is_code_block_delimiter("```"));
1002        assert!(is_code_block_delimiter("~~~"));
1003        assert!(is_code_block_delimiter("````"));
1004        assert!(is_code_block_delimiter("```rust"));
1005        assert!(is_code_block_delimiter("  ```"));
1006
1007        assert!(!is_code_block_delimiter("``")); // Too short
1008        assert!(!is_code_block_delimiter("# Heading"));
1009    }
1010
1011    #[test]
1012    fn test_is_front_matter_delimiter() {
1013        assert!(is_front_matter_delimiter("---"));
1014        assert!(is_front_matter_delimiter("---  "));
1015
1016        assert!(!is_front_matter_delimiter("----"));
1017        assert!(!is_front_matter_delimiter("--"));
1018        assert!(!is_front_matter_delimiter("# ---"));
1019    }
1020
1021    #[test]
1022    fn test_remove_trailing_hashes() {
1023        assert_eq!(remove_trailing_hashes("Heading ###"), "Heading");
1024        assert_eq!(remove_trailing_hashes("Heading ## "), "Heading");
1025        assert_eq!(remove_trailing_hashes("Heading #not trailing"), "Heading #not trailing");
1026        assert_eq!(remove_trailing_hashes("No hashes"), "No hashes");
1027        assert_eq!(remove_trailing_hashes(""), "");
1028
1029        // Test the specific case that was failing
1030        assert_eq!(remove_trailing_hashes("Heading ##"), "Heading");
1031        assert_eq!(remove_trailing_hashes("Heading #"), "Heading");
1032        assert_eq!(remove_trailing_hashes("Heading ####"), "Heading");
1033
1034        // Edge cases
1035        assert_eq!(remove_trailing_hashes("#"), "");
1036        assert_eq!(remove_trailing_hashes("##"), "");
1037        assert_eq!(remove_trailing_hashes("###"), "");
1038        assert_eq!(remove_trailing_hashes("Text#"), "Text");
1039        assert_eq!(remove_trailing_hashes("Text ##"), "Text");
1040    }
1041
1042    #[test]
1043    fn test_normalize_heading() {
1044        assert_eq!(normalize_heading("# Old Level", 3), "### Old Level");
1045        assert_eq!(normalize_heading("## Heading ##", 1), "# Heading");
1046        assert_eq!(normalize_heading("  # Indented", 2), "  ## Indented");
1047        assert_eq!(normalize_heading("Plain text", 1), "# Plain text");
1048    }
1049
1050    #[test]
1051    fn test_heading_style_from_str() {
1052        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
1053        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
1054        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
1055        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
1056        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
1057        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
1058        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
1059        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
1060    }
1061
1062    #[test]
1063    fn test_heading_style_display() {
1064        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
1065        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx_closed");
1066        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
1067        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
1068        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
1069    }
1070
1071    #[test]
1072    fn test_unicode_headings() {
1073        let content = "# 你好世界\n## Ñoño\n### 🚀 Emoji";
1074        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().text, "你好世界");
1075        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().text, "Ñoño");
1076        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().text, "🚀 Emoji");
1077
1078        // Test fragment generation with unicode
1079        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
1080        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
1081    }
1082
1083    #[test]
1084    fn test_complex_nested_structures() {
1085        // Code block inside front matter (edge case)
1086        // The function doesn't handle YAML multi-line strings, so ``` inside front matter
1087        // is treated as a code block start
1088        let content = "---\ncode: |\n  ```\n  # Not a heading\n  ```\n---\n# Real heading";
1089        assert!(HeadingUtils::is_in_code_block(content, 4)); // Inside code block
1090        assert!(HeadingUtils::parse_heading(content, 7).is_some());
1091
1092        // Multiple code blocks
1093        let content = "```\ncode\n```\n# Heading\n~~~\nmore code\n~~~";
1094        assert!(!HeadingUtils::is_in_code_block(content, 4));
1095        assert!(HeadingUtils::parse_heading(content, 4).is_some());
1096    }
1097}