rumdl_lib/rules/
heading_utils.rs

1use lazy_static::lazy_static;
2use regex::Regex;
3use std::fmt;
4use std::str::FromStr;
5
6lazy_static! {
7    // Optimized regex patterns with more efficient non-capturing groups
8    static ref ATX_PATTERN: Regex = Regex::new(r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$").unwrap();
9    static ref SETEXT_HEADING_1: Regex = Regex::new(r"^(\s*)(=+)(\s*)$").unwrap();
10    static ref SETEXT_HEADING_2: Regex = Regex::new(r"^(\s*)(-+)(\s*)$").unwrap();
11    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,}).*$").unwrap();
12    static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})\s*$").unwrap();
13    static ref FRONT_MATTER_DELIMITER: Regex = Regex::new(r"^---\s*$").unwrap();
14    static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
15    static ref HTML_TAG_REGEX: Regex = Regex::new(r"<[^>]*>").unwrap();
16
17    // Single line emphasis patterns
18    static ref SINGLE_LINE_ASTERISK_EMPHASIS: Regex = Regex::new(r"^\s*\*([^*\n]+)\*\s*$").unwrap();
19    static ref SINGLE_LINE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"^\s*_([^_\n]+)_\s*$").unwrap();
20    static ref SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS: Regex = Regex::new(r"^\s*\*\*([^*\n]+)\*\*\s*$").unwrap();
21    static ref SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"^\s*__([^_\n]+)__\s*$").unwrap();
22}
23
24/// Represents different styles of Markdown headings
25#[derive(Debug, Clone, PartialEq, Copy)]
26pub enum HeadingStyle {
27    Atx,       // # Heading
28    AtxClosed, // # Heading #
29    Setext1,   // Heading
30    // =======
31    Setext2, // Heading
32    // -------
33    Consistent,          // For maintaining consistency with the first found header style
34    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
35    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
36}
37
38impl fmt::Display for HeadingStyle {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        let s = match self {
41            HeadingStyle::Atx => "atx",
42            HeadingStyle::AtxClosed => "atx_closed",
43            HeadingStyle::Setext1 => "setext1",
44            HeadingStyle::Setext2 => "setext2",
45            HeadingStyle::Consistent => "consistent",
46            HeadingStyle::SetextWithAtx => "setext_with_atx",
47            HeadingStyle::SetextWithAtxClosed => "setext_with_atx_closed",
48        };
49        write!(f, "{s}")
50    }
51}
52
53impl FromStr for HeadingStyle {
54    type Err = ();
55    fn from_str(s: &str) -> Result<Self, Self::Err> {
56        match s.to_ascii_lowercase().as_str() {
57            "atx" => Ok(HeadingStyle::Atx),
58            "atx_closed" => Ok(HeadingStyle::AtxClosed),
59            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
60            "setext2" => Ok(HeadingStyle::Setext2),
61            "consistent" => Ok(HeadingStyle::Consistent),
62            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
63            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
64            _ => Err(()),
65        }
66    }
67}
68
69/// Represents a heading in a Markdown document
70#[derive(Debug, Clone, PartialEq)]
71pub struct Heading {
72    pub text: String,
73    pub level: u32,
74    pub style: HeadingStyle,
75    pub line_number: usize,
76    pub original_text: String,
77    pub indentation: String,
78}
79
80/// Utility functions for working with Markdown headings
81pub struct HeadingUtils;
82
83impl HeadingUtils {
84    /// Check if a line is an ATX heading (starts with #)
85    pub fn is_atx_heading(line: &str) -> bool {
86        ATX_PATTERN.is_match(line)
87    }
88
89    /// Check if a line is inside a code block
90    pub fn is_in_code_block(content: &str, line_number: usize) -> bool {
91        let mut in_code_block = false;
92        let mut fence_char = None;
93        let mut line_count = 0;
94
95        for line in content.lines() {
96            line_count += 1;
97            if line_count > line_number {
98                break;
99            }
100
101            let trimmed = line.trim();
102            if trimmed.len() >= 3 {
103                let first_chars: Vec<char> = trimmed.chars().take(3).collect();
104                if first_chars.iter().all(|&c| c == '`' || c == '~') {
105                    if let Some(current_fence) = fence_char {
106                        if first_chars[0] == current_fence && first_chars.iter().all(|&c| c == current_fence) {
107                            in_code_block = false;
108                            fence_char = None;
109                        }
110                    } else {
111                        in_code_block = true;
112                        fence_char = Some(first_chars[0]);
113                    }
114                }
115            }
116        }
117
118        in_code_block
119    }
120
121    /// Parse a line into a Heading struct if it's a valid heading
122    pub fn parse_heading(content: &str, line_num: usize) -> Option<Heading> {
123        let lines: Vec<&str> = content.lines().collect();
124        if line_num == 0 || line_num > lines.len() {
125            return None;
126        }
127
128        let line = lines[line_num - 1];
129
130        // Skip if line is within a code block
131        if Self::is_in_code_block(content, line_num) {
132            return None;
133        }
134
135        // Check for ATX style headings
136        if let Some(captures) = ATX_PATTERN.captures(line) {
137            let indentation = captures.get(1).map_or("", |m| m.as_str()).to_string();
138            let opening_hashes = captures.get(2).map_or("", |m| m.as_str());
139            let level = opening_hashes.len() as u32;
140            let text = captures.get(4).map_or("", |m| m.as_str()).to_string();
141
142            let style = if let Some(closing) = captures.get(5) {
143                let closing_hashes = closing.as_str();
144                if closing_hashes.len() == opening_hashes.len() {
145                    HeadingStyle::AtxClosed
146                } else {
147                    HeadingStyle::Atx
148                }
149            } else {
150                HeadingStyle::Atx
151            };
152
153            let heading = Heading {
154                text: text.clone(),
155                level,
156                style,
157                line_number: line_num,
158                original_text: line.to_string(),
159                indentation: indentation.clone(),
160            };
161            return Some(heading);
162        }
163
164        // Check for Setext style headings
165        if line_num < lines.len() {
166            let next_line = lines[line_num];
167            let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
168
169            // Skip empty lines - don't consider them as potential Setext headings
170            if line.trim().is_empty() {
171                return None;
172            }
173
174            // Skip list items - they shouldn't be considered as potential Setext headings
175            if line.trim_start().starts_with('-')
176                || line.trim_start().starts_with('*')
177                || line.trim_start().starts_with('+')
178                || line.trim_start().starts_with("1.")
179            {
180                return None;
181            }
182
183            // Skip front matter delimiters or lines within front matter
184            if line.trim() == "---" || Self::is_in_front_matter(content, line_num - 1) {
185                return None;
186            }
187
188            if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
189                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
190                if underline_indent == line_indentation {
191                    let heading = Heading {
192                        text: line[line_indentation.len()..].to_string(),
193                        level: 1,
194                        style: HeadingStyle::Setext1,
195                        line_number: line_num,
196                        original_text: format!("{line}\n{next_line}"),
197                        indentation: line_indentation.clone(),
198                    };
199                    return Some(heading);
200                }
201            } else if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
202                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
203                if underline_indent == line_indentation {
204                    let heading = Heading {
205                        text: line[line_indentation.len()..].to_string(),
206                        level: 2,
207                        style: HeadingStyle::Setext2,
208                        line_number: line_num,
209                        original_text: format!("{line}\n{next_line}"),
210                        indentation: line_indentation.clone(),
211                    };
212                    return Some(heading);
213                }
214            }
215        }
216
217        None
218    }
219
220    /// Get the indentation level of a line
221    pub fn get_indentation(line: &str) -> usize {
222        line.len() - line.trim_start().len()
223    }
224
225    /// Convert a heading to a different style
226    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
227        if text_content.trim().is_empty() {
228            return String::new();
229        }
230
231        // Validate heading level
232        let level = level.clamp(1, 6);
233        let indentation = text_content
234            .chars()
235            .take_while(|c| c.is_whitespace())
236            .collect::<String>();
237        let text_content = text_content.trim();
238
239        match style {
240            HeadingStyle::Atx => {
241                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
242            }
243            HeadingStyle::AtxClosed => {
244                format!(
245                    "{}{} {} {}",
246                    indentation,
247                    "#".repeat(level as usize),
248                    text_content,
249                    "#".repeat(level as usize)
250                )
251            }
252            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
253                if level > 2 {
254                    // Fall back to ATX style for levels > 2
255                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
256                } else {
257                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
258                        '='
259                    } else {
260                        '-'
261                    };
262                    let visible_length = text_content.chars().count();
263                    let underline_length = visible_length.max(1); // Ensure at least 1 underline char
264                    format!(
265                        "{}{}\n{}{}",
266                        indentation,
267                        text_content,
268                        indentation,
269                        underline_char.to_string().repeat(underline_length)
270                    )
271                }
272            }
273            HeadingStyle::Consistent => {
274                // For Consistent style, default to ATX as it's the most commonly used
275                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
276            }
277            HeadingStyle::SetextWithAtx => {
278                if level <= 2 {
279                    // Use Setext for h1/h2
280                    let underline_char = if level == 1 { '=' } else { '-' };
281                    let visible_length = text_content.chars().count();
282                    let underline_length = visible_length.max(1);
283                    format!(
284                        "{}{}\n{}{}",
285                        indentation,
286                        text_content,
287                        indentation,
288                        underline_char.to_string().repeat(underline_length)
289                    )
290                } else {
291                    // Use ATX for h3-h6
292                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
293                }
294            }
295            HeadingStyle::SetextWithAtxClosed => {
296                if level <= 2 {
297                    // Use Setext for h1/h2
298                    let underline_char = if level == 1 { '=' } else { '-' };
299                    let visible_length = text_content.chars().count();
300                    let underline_length = visible_length.max(1);
301                    format!(
302                        "{}{}\n{}{}",
303                        indentation,
304                        text_content,
305                        indentation,
306                        underline_char.to_string().repeat(underline_length)
307                    )
308                } else {
309                    // Use ATX closed for h3-h6
310                    format!(
311                        "{}{} {} {}",
312                        indentation,
313                        "#".repeat(level as usize),
314                        text_content,
315                        "#".repeat(level as usize)
316                    )
317                }
318            }
319        }
320    }
321
322    /// Get the text content of a heading line
323    pub fn get_heading_text(line: &str) -> Option<String> {
324        ATX_PATTERN
325            .captures(line)
326            .map(|captures| captures.get(4).map_or("", |m| m.as_str()).trim().to_string())
327    }
328
329    /// Detect emphasis-only lines
330    pub fn is_emphasis_only_line(line: &str) -> bool {
331        let trimmed = line.trim();
332        SINGLE_LINE_ASTERISK_EMPHASIS.is_match(trimmed)
333            || SINGLE_LINE_UNDERSCORE_EMPHASIS.is_match(trimmed)
334            || SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS.is_match(trimmed)
335            || SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS.is_match(trimmed)
336    }
337
338    /// Extract text from an emphasis-only line
339    pub fn extract_emphasis_text(line: &str) -> Option<(String, u32)> {
340        let trimmed = line.trim();
341
342        if let Some(caps) = SINGLE_LINE_ASTERISK_EMPHASIS.captures(trimmed) {
343            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
344        }
345
346        if let Some(caps) = SINGLE_LINE_UNDERSCORE_EMPHASIS.captures(trimmed) {
347            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
348        }
349
350        if let Some(caps) = SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS.captures(trimmed) {
351            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
352        }
353
354        if let Some(caps) = SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS.captures(trimmed) {
355            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
356        }
357
358        None
359    }
360
361    /// Convert emphasis to heading
362    pub fn convert_emphasis_to_heading(line: &str) -> Option<String> {
363        // Preserve the original indentation
364        let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
365        // Preserve trailing spaces at the end of the line
366        let trailing = if line.ends_with(" ") {
367            line.chars().rev().take_while(|c| c.is_whitespace()).collect::<String>()
368        } else {
369            String::new()
370        };
371
372        if let Some((text, level)) = Self::extract_emphasis_text(line) {
373            // Preserve the original indentation and trailing spaces
374            Some(format!(
375                "{}{} {}{}",
376                indentation,
377                "#".repeat(level as usize),
378                text,
379                trailing
380            ))
381        } else {
382            None
383        }
384    }
385
386    /// Convert a heading text to a valid ID for fragment links
387    pub fn heading_to_fragment(text: &str) -> String {
388        // Remove any HTML tags
389        let text_no_html = HTML_TAG_REGEX.replace_all(text, "");
390
391        // Convert to lowercase and trim
392        let text_lower = text_no_html.trim().to_lowercase();
393
394        // Replace spaces and punctuation with hyphens
395        let text_with_hyphens = text_lower
396            .chars()
397            .map(|c| if c.is_alphanumeric() { c } else { '-' })
398            .collect::<String>();
399
400        // Replace multiple consecutive hyphens with a single hyphen
401        let text_clean = text_with_hyphens
402            .split('-')
403            .filter(|s| !s.is_empty())
404            .collect::<Vec<_>>()
405            .join("-");
406
407        // Remove leading and trailing hyphens
408        text_clean.trim_matches('-').to_string()
409    }
410
411    /// Check if a line is in front matter
412    pub fn is_in_front_matter(content: &str, line_number: usize) -> bool {
413        let lines: Vec<&str> = content.lines().collect();
414        if lines.is_empty() || line_number >= lines.len() {
415            return false;
416        }
417
418        // Check if the document starts with front matter
419        if !lines[0].trim_start().eq("---") {
420            return false;
421        }
422
423        let mut in_front_matter = true;
424        let mut found_closing = false;
425
426        // Skip the first line (opening delimiter)
427        for (i, line) in lines.iter().enumerate().skip(1) {
428            if i > line_number {
429                break;
430            }
431
432            if line.trim_start().eq("---") {
433                found_closing = true;
434                in_front_matter = i > line_number;
435                break;
436            }
437        }
438
439        in_front_matter && !found_closing
440    }
441}
442
443/// Checks if a line is a heading
444#[inline]
445pub fn is_heading(line: &str) -> bool {
446    // Fast path checks first
447    let trimmed = line.trim();
448    if trimmed.is_empty() {
449        return false;
450    }
451
452    if trimmed.starts_with('#') {
453        // Check for ATX heading
454        ATX_PATTERN.is_match(line)
455    } else {
456        // We can't tell for setext headings without looking at the next line
457        false
458    }
459}
460
461/// Checks if a line is a setext heading marker
462#[inline]
463pub fn is_setext_heading_marker(line: &str) -> bool {
464    SETEXT_HEADING_1.is_match(line) || SETEXT_HEADING_2.is_match(line)
465}
466
467/// Checks if a line is a setext heading by examining its next line
468#[inline]
469pub fn is_setext_heading(lines: &[&str], index: usize) -> bool {
470    if index >= lines.len() - 1 {
471        return false;
472    }
473
474    let current_line = lines[index];
475    let next_line = lines[index + 1];
476
477    // Skip if current line is empty
478    if current_line.trim().is_empty() {
479        return false;
480    }
481
482    // Check if next line is a setext heading marker with same indentation
483    let current_indentation = current_line
484        .chars()
485        .take_while(|c| c.is_whitespace())
486        .collect::<String>();
487
488    if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
489        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
490        return underline_indent == current_indentation;
491    }
492
493    if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
494        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
495        return underline_indent == current_indentation;
496    }
497
498    false
499}
500
501/// Get the heading level for a line
502#[inline]
503pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
504    if index >= lines.len() {
505        return 0;
506    }
507
508    let line = lines[index];
509
510    // Check for ATX style heading
511    if let Some(captures) = ATX_PATTERN.captures(line) {
512        let hashes = captures.get(2).map_or("", |m| m.as_str());
513        return hashes.len() as u32;
514    }
515
516    // Check for setext style heading
517    if index < lines.len() - 1 {
518        let next_line = lines[index + 1];
519
520        if SETEXT_HEADING_1.is_match(next_line) {
521            return 1;
522        }
523
524        if SETEXT_HEADING_2.is_match(next_line) {
525            return 2;
526        }
527    }
528
529    0
530}
531
532/// Extract the text content from a heading
533#[inline]
534pub fn extract_heading_text(lines: &[&str], index: usize) -> String {
535    if index >= lines.len() {
536        return String::new();
537    }
538
539    let line = lines[index];
540
541    // Extract from ATX heading
542    if let Some(captures) = ATX_PATTERN.captures(line) {
543        return captures.get(4).map_or("", |m| m.as_str()).trim().to_string();
544    }
545
546    // Extract from setext heading
547    if index < lines.len() - 1 {
548        let next_line = lines[index + 1];
549        let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
550
551        if let Some(captures) = SETEXT_HEADING_1.captures(next_line) {
552            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
553            if underline_indent == line_indentation {
554                return line[line_indentation.len()..].trim().to_string();
555            }
556        }
557
558        if let Some(captures) = SETEXT_HEADING_2.captures(next_line) {
559            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
560            if underline_indent == line_indentation {
561                return line[line_indentation.len()..].trim().to_string();
562            }
563        }
564    }
565
566    line.trim().to_string()
567}
568
569/// Get the indentation of a heading
570#[inline]
571pub fn get_heading_indentation(lines: &[&str], index: usize) -> usize {
572    if index >= lines.len() {
573        return 0;
574    }
575
576    let line = lines[index];
577    line.len() - line.trim_start().len()
578}
579
580/// Check if a line is a code block delimiter
581#[inline]
582pub fn is_code_block_delimiter(line: &str) -> bool {
583    FENCED_CODE_BLOCK_START.is_match(line) || FENCED_CODE_BLOCK_END.is_match(line)
584}
585
586/// Check if a line is a front matter delimiter
587#[inline]
588pub fn is_front_matter_delimiter(line: &str) -> bool {
589    FRONT_MATTER_DELIMITER.is_match(line)
590}
591
592/// Remove trailing hashes from a heading
593#[inline]
594pub fn remove_trailing_hashes(text: &str) -> String {
595    let trimmed = text.trim_end();
596
597    // Find the last hash
598    if let Some(last_hash_index) = trimmed.rfind('#') {
599        // Check if everything after this position is only hashes and whitespace
600        if trimmed[last_hash_index..]
601            .chars()
602            .all(|c| c == '#' || c.is_whitespace())
603        {
604            // Find the start of the trailing hash sequence
605            let mut first_hash_index = last_hash_index;
606            let trimmed_chars: Vec<char> = trimmed.chars().collect();
607            while first_hash_index > 0 {
608                let prev_index = first_hash_index - 1;
609                if prev_index < trimmed_chars.len() && trimmed_chars[prev_index] == '#' {
610                    first_hash_index = prev_index;
611                } else {
612                    break;
613                }
614            }
615
616            // Remove the trailing hashes
617            return trimmed[..first_hash_index].trim_end().to_string();
618        }
619    }
620
621    trimmed.to_string()
622}
623
624/// Normalize a heading to the specified level
625#[inline]
626pub fn normalize_heading(line: &str, level: u32) -> String {
627    let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
628    let trimmed = line.trim_start();
629
630    if trimmed.starts_with('#') {
631        if let Some(text) = HeadingUtils::get_heading_text(line) {
632            format!("{}{} {}", indentation, "#".repeat(level as usize), text)
633        } else {
634            line.to_string()
635        }
636    } else {
637        format!("{}{} {}", indentation, "#".repeat(level as usize), trimmed)
638    }
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644
645    #[test]
646    fn test_atx_heading_parsing() {
647        let content = "# Heading 1\n## Heading 2\n### Heading 3";
648        assert!(HeadingUtils::parse_heading(content, 1).is_some());
649        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
650        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().level, 2);
651        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 3);
652    }
653
654    #[test]
655    fn test_setext_heading_parsing() {
656        let content = "Heading 1\n=========\nHeading 2\n---------";
657        assert!(HeadingUtils::parse_heading(content, 1).is_some());
658        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
659        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 2);
660    }
661
662    #[test]
663    fn test_heading_style_conversion() {
664        assert_eq!(
665            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
666            "# Heading 1"
667        );
668        assert_eq!(
669            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
670            "## Heading 2 ##"
671        );
672        assert_eq!(
673            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
674            "Heading 1\n========="
675        );
676        assert_eq!(
677            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
678            "Heading 2\n---------"
679        );
680    }
681
682    #[test]
683    fn test_code_block_detection() {
684        let content = "# Heading\n```\n# Not a heading\n```\n# Another heading";
685        assert!(!HeadingUtils::is_in_code_block(content, 0));
686        assert!(HeadingUtils::is_in_code_block(content, 2));
687        assert!(!HeadingUtils::is_in_code_block(content, 4));
688    }
689
690    #[test]
691    fn test_empty_line_with_dashes() {
692        // Test that an empty line followed by dashes is not considered a heading
693        let content = "\n---";
694
695        // Empty line is at index 0, dashes at index 1
696        assert_eq!(
697            HeadingUtils::parse_heading(content, 1),
698            None,
699            "Empty line followed by dashes should not be detected as a heading"
700        );
701
702        // Also test with a regular horizontal rule
703        let content2 = "Some content\n\n---\nMore content";
704        assert_eq!(
705            HeadingUtils::parse_heading(content2, 2),
706            None,
707            "Empty line followed by horizontal rule should not be detected as a heading"
708        );
709    }
710
711    #[test]
712    fn test_is_atx_heading() {
713        assert!(HeadingUtils::is_atx_heading("# Heading"));
714        assert!(HeadingUtils::is_atx_heading("## Heading"));
715        assert!(HeadingUtils::is_atx_heading("### Heading"));
716        assert!(HeadingUtils::is_atx_heading("#### Heading"));
717        assert!(HeadingUtils::is_atx_heading("##### Heading"));
718        assert!(HeadingUtils::is_atx_heading("###### Heading"));
719        assert!(HeadingUtils::is_atx_heading("  # Indented"));
720        assert!(HeadingUtils::is_atx_heading("# Heading #"));
721        assert!(HeadingUtils::is_atx_heading("## Heading ###"));
722
723        assert!(!HeadingUtils::is_atx_heading("####### Too many"));
724        assert!(!HeadingUtils::is_atx_heading("Not a heading"));
725        assert!(HeadingUtils::is_atx_heading("#")); // Single # is a valid heading
726        assert!(!HeadingUtils::is_atx_heading(""));
727    }
728
729    #[test]
730    fn test_heading_edge_cases() {
731        // Test invalid line numbers
732        let content = "# Heading";
733        assert!(HeadingUtils::parse_heading(content, 0).is_none());
734        assert!(HeadingUtils::parse_heading(content, 10).is_none());
735
736        // Test headings in code blocks
737        let content = "```\n# Not a heading\n```";
738        assert!(HeadingUtils::parse_heading(content, 2).is_none());
739
740        // Test with tildes for code blocks
741        let content = "~~~\n# Not a heading\n~~~";
742        assert!(HeadingUtils::is_in_code_block(content, 2));
743
744        // Test mixed fence characters
745        let content = "```\n# Content\n~~~"; // Mismatched fences
746        assert!(HeadingUtils::is_in_code_block(content, 2));
747    }
748
749    #[test]
750    fn test_atx_closed_heading_variations() {
751        let content = "# Heading #\n## Heading ##\n### Heading ####\n#### Heading ##";
752        let h1 = HeadingUtils::parse_heading(content, 1).unwrap();
753        assert_eq!(h1.style, HeadingStyle::AtxClosed);
754        assert_eq!(h1.text, "Heading");
755
756        let h2 = HeadingUtils::parse_heading(content, 2).unwrap();
757        assert_eq!(h2.style, HeadingStyle::AtxClosed);
758
759        // Mismatched closing hashes - still ATX but not closed
760        let h3 = HeadingUtils::parse_heading(content, 3).unwrap();
761        assert_eq!(h3.style, HeadingStyle::Atx);
762
763        let h4 = HeadingUtils::parse_heading(content, 4).unwrap();
764        assert_eq!(h4.style, HeadingStyle::Atx);
765    }
766
767    #[test]
768    fn test_setext_heading_edge_cases() {
769        // List item followed by dashes should not be a heading
770        let content = "- List item\n---------";
771        assert!(HeadingUtils::parse_heading(content, 1).is_none());
772
773        // Front matter should not be a heading
774        let content = "---\ntitle: test\n---";
775        assert!(HeadingUtils::parse_heading(content, 1).is_none());
776
777        // Indented setext headings
778        let content = "  Indented\n  ========";
779        let heading = HeadingUtils::parse_heading(content, 1).unwrap();
780        assert_eq!(heading.indentation, "  ");
781        assert_eq!(heading.text, "Indented");
782
783        // Mismatched indentation should not be a heading
784        let content = "  Text\n========"; // No indent on underline
785        assert!(HeadingUtils::parse_heading(content, 1).is_none());
786    }
787
788    #[test]
789    fn test_get_indentation() {
790        assert_eq!(HeadingUtils::get_indentation("# Heading"), 0);
791        assert_eq!(HeadingUtils::get_indentation("  # Heading"), 2);
792        assert_eq!(HeadingUtils::get_indentation("    # Heading"), 4);
793        assert_eq!(HeadingUtils::get_indentation("\t# Heading"), 1);
794        assert_eq!(HeadingUtils::get_indentation(""), 0);
795    }
796
797    #[test]
798    fn test_convert_heading_style_edge_cases() {
799        // Empty text
800        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "");
801        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "");
802
803        // Level clamping
804        assert_eq!(
805            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
806            "# Text"
807        );
808        assert_eq!(
809            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
810            "###### Text"
811        );
812
813        // Setext with level > 2 falls back to ATX
814        assert_eq!(
815            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
816            "### Text"
817        );
818
819        // Preserve indentation
820        assert_eq!(
821            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
822            "  # Text"
823        );
824
825        // Very short text for setext
826        assert_eq!(
827            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
828            "Hi\n=="
829        );
830    }
831
832    #[test]
833    fn test_get_heading_text() {
834        assert_eq!(HeadingUtils::get_heading_text("# Heading"), Some("Heading".to_string()));
835        assert_eq!(
836            HeadingUtils::get_heading_text("## Heading ##"),
837            Some("Heading".to_string())
838        );
839        assert_eq!(
840            HeadingUtils::get_heading_text("###   Spaces   "),
841            Some("Spaces".to_string())
842        );
843        assert_eq!(HeadingUtils::get_heading_text("Not a heading"), None);
844        assert_eq!(HeadingUtils::get_heading_text(""), None);
845    }
846
847    #[test]
848    fn test_emphasis_detection() {
849        assert!(HeadingUtils::is_emphasis_only_line("*emphasis*"));
850        assert!(HeadingUtils::is_emphasis_only_line("_emphasis_"));
851        assert!(HeadingUtils::is_emphasis_only_line("**strong**"));
852        assert!(HeadingUtils::is_emphasis_only_line("__strong__"));
853        assert!(HeadingUtils::is_emphasis_only_line("  *emphasis*  "));
854
855        assert!(!HeadingUtils::is_emphasis_only_line("*not* emphasis"));
856        assert!(!HeadingUtils::is_emphasis_only_line("text *emphasis*"));
857        assert!(!HeadingUtils::is_emphasis_only_line("**"));
858        assert!(!HeadingUtils::is_emphasis_only_line(""));
859    }
860
861    #[test]
862    fn test_extract_emphasis_text() {
863        assert_eq!(
864            HeadingUtils::extract_emphasis_text("*text*"),
865            Some(("text".to_string(), 1))
866        );
867        assert_eq!(
868            HeadingUtils::extract_emphasis_text("_text_"),
869            Some(("text".to_string(), 1))
870        );
871        assert_eq!(
872            HeadingUtils::extract_emphasis_text("**text**"),
873            Some(("text".to_string(), 2))
874        );
875        assert_eq!(
876            HeadingUtils::extract_emphasis_text("__text__"),
877            Some(("text".to_string(), 2))
878        );
879        assert_eq!(
880            HeadingUtils::extract_emphasis_text("  *spaced*  "),
881            Some(("spaced".to_string(), 1))
882        );
883
884        assert_eq!(HeadingUtils::extract_emphasis_text("not emphasis"), None);
885        assert_eq!(HeadingUtils::extract_emphasis_text("*not* complete"), None);
886    }
887
888    #[test]
889    fn test_convert_emphasis_to_heading() {
890        assert_eq!(
891            HeadingUtils::convert_emphasis_to_heading("*text*"),
892            Some("# text".to_string())
893        );
894        assert_eq!(
895            HeadingUtils::convert_emphasis_to_heading("**text**"),
896            Some("## text".to_string())
897        );
898        assert_eq!(
899            HeadingUtils::convert_emphasis_to_heading("  *text*"),
900            Some("  # text".to_string())
901        );
902        assert_eq!(
903            HeadingUtils::convert_emphasis_to_heading("*text* "),
904            Some("# text ".to_string())
905        );
906
907        assert_eq!(HeadingUtils::convert_emphasis_to_heading("not emphasis"), None);
908    }
909
910    #[test]
911    fn test_heading_to_fragment() {
912        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
913        assert_eq!(
914            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
915            "heading-with-numbers-123"
916        );
917        assert_eq!(
918            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
919            "special-characters"
920        );
921        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
922        assert_eq!(
923            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
924            "multiple-spaces"
925        );
926        assert_eq!(
927            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
928            "heading-with-html"
929        );
930        assert_eq!(
931            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
932            "leading-dashes"
933        );
934        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
935    }
936
937    #[test]
938    fn test_is_in_front_matter() {
939        let content = "---\ntitle: Test\n---\n# Content";
940        assert!(HeadingUtils::is_in_front_matter(content, 1));
941        assert!(!HeadingUtils::is_in_front_matter(content, 2)); // Closing delimiter is not considered in front matter
942        assert!(!HeadingUtils::is_in_front_matter(content, 3));
943        assert!(!HeadingUtils::is_in_front_matter(content, 4));
944
945        // No front matter
946        let content = "# Just content";
947        assert!(!HeadingUtils::is_in_front_matter(content, 0));
948
949        // Unclosed front matter
950        let content = "---\ntitle: Test\n# No closing";
951        assert!(HeadingUtils::is_in_front_matter(content, 1));
952        assert!(HeadingUtils::is_in_front_matter(content, 2)); // Still in unclosed front matter
953
954        // Front matter not at start
955        let content = "# Heading\n---\ntitle: Test\n---";
956        assert!(!HeadingUtils::is_in_front_matter(content, 2));
957    }
958
959    #[test]
960    fn test_module_level_functions() {
961        // Test is_heading
962        assert!(is_heading("# Heading"));
963        assert!(is_heading("  ## Indented"));
964        assert!(!is_heading("Not a heading"));
965        assert!(!is_heading(""));
966
967        // Test is_setext_heading_marker
968        assert!(is_setext_heading_marker("========"));
969        assert!(is_setext_heading_marker("--------"));
970        assert!(is_setext_heading_marker("  ======"));
971        assert!(!is_setext_heading_marker("# Heading"));
972        assert!(is_setext_heading_marker("---")); // Three dashes is valid
973
974        // Test is_setext_heading
975        let lines = vec!["Title", "====="];
976        assert!(is_setext_heading(&lines, 0));
977
978        let lines = vec!["", "====="];
979        assert!(!is_setext_heading(&lines, 0));
980
981        // Test get_heading_level
982        let lines = vec!["# H1", "## H2", "### H3"];
983        assert_eq!(get_heading_level(&lines, 0), 1);
984        assert_eq!(get_heading_level(&lines, 1), 2);
985        assert_eq!(get_heading_level(&lines, 2), 3);
986        assert_eq!(get_heading_level(&lines, 10), 0);
987
988        // Test extract_heading_text
989        let lines = vec!["# Heading Text", "## Another ###"];
990        assert_eq!(extract_heading_text(&lines, 0), "Heading Text");
991        assert_eq!(extract_heading_text(&lines, 1), "Another");
992
993        // Test get_heading_indentation
994        let lines = vec!["# No indent", "  ## Two spaces", "    ### Four spaces"];
995        assert_eq!(get_heading_indentation(&lines, 0), 0);
996        assert_eq!(get_heading_indentation(&lines, 1), 2);
997        assert_eq!(get_heading_indentation(&lines, 2), 4);
998    }
999
1000    #[test]
1001    fn test_is_code_block_delimiter() {
1002        assert!(is_code_block_delimiter("```"));
1003        assert!(is_code_block_delimiter("~~~"));
1004        assert!(is_code_block_delimiter("````"));
1005        assert!(is_code_block_delimiter("```rust"));
1006        assert!(is_code_block_delimiter("  ```"));
1007
1008        assert!(!is_code_block_delimiter("``")); // Too short
1009        assert!(!is_code_block_delimiter("# Heading"));
1010    }
1011
1012    #[test]
1013    fn test_is_front_matter_delimiter() {
1014        assert!(is_front_matter_delimiter("---"));
1015        assert!(is_front_matter_delimiter("---  "));
1016
1017        assert!(!is_front_matter_delimiter("----"));
1018        assert!(!is_front_matter_delimiter("--"));
1019        assert!(!is_front_matter_delimiter("# ---"));
1020    }
1021
1022    #[test]
1023    fn test_remove_trailing_hashes() {
1024        assert_eq!(remove_trailing_hashes("Heading ###"), "Heading");
1025        assert_eq!(remove_trailing_hashes("Heading ## "), "Heading");
1026        assert_eq!(remove_trailing_hashes("Heading #not trailing"), "Heading #not trailing");
1027        assert_eq!(remove_trailing_hashes("No hashes"), "No hashes");
1028        assert_eq!(remove_trailing_hashes(""), "");
1029
1030        // Test the specific case that was failing
1031        assert_eq!(remove_trailing_hashes("Heading ##"), "Heading");
1032        assert_eq!(remove_trailing_hashes("Heading #"), "Heading");
1033        assert_eq!(remove_trailing_hashes("Heading ####"), "Heading");
1034
1035        // Edge cases
1036        assert_eq!(remove_trailing_hashes("#"), "");
1037        assert_eq!(remove_trailing_hashes("##"), "");
1038        assert_eq!(remove_trailing_hashes("###"), "");
1039        assert_eq!(remove_trailing_hashes("Text#"), "Text");
1040        assert_eq!(remove_trailing_hashes("Text ##"), "Text");
1041    }
1042
1043    #[test]
1044    fn test_normalize_heading() {
1045        assert_eq!(normalize_heading("# Old Level", 3), "### Old Level");
1046        assert_eq!(normalize_heading("## Heading ##", 1), "# Heading");
1047        assert_eq!(normalize_heading("  # Indented", 2), "  ## Indented");
1048        assert_eq!(normalize_heading("Plain text", 1), "# Plain text");
1049    }
1050
1051    #[test]
1052    fn test_heading_style_from_str() {
1053        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
1054        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
1055        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
1056        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
1057        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
1058        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
1059        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
1060        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
1061    }
1062
1063    #[test]
1064    fn test_heading_style_display() {
1065        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
1066        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx_closed");
1067        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
1068        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
1069        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
1070    }
1071
1072    #[test]
1073    fn test_unicode_headings() {
1074        let content = "# 你好世界\n## Ñoño\n### 🚀 Emoji";
1075        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().text, "你好世界");
1076        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().text, "Ñoño");
1077        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().text, "🚀 Emoji");
1078
1079        // Test fragment generation with unicode
1080        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
1081        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
1082    }
1083
1084    #[test]
1085    fn test_complex_nested_structures() {
1086        // Code block inside front matter (edge case)
1087        // The function doesn't handle YAML multi-line strings, so ``` inside front matter
1088        // is treated as a code block start
1089        let content = "---\ncode: |\n  ```\n  # Not a heading\n  ```\n---\n# Real heading";
1090        assert!(HeadingUtils::is_in_code_block(content, 4)); // Inside code block
1091        assert!(HeadingUtils::parse_heading(content, 7).is_some());
1092
1093        // Multiple code blocks
1094        let content = "```\ncode\n```\n# Heading\n~~~\nmore code\n~~~";
1095        assert!(!HeadingUtils::is_in_code_block(content, 4));
1096        assert!(HeadingUtils::parse_heading(content, 4).is_some());
1097    }
1098}