Skip to main content

rumdl_lib/rules/
heading_utils.rs

1use crate::utils::regex_cache::get_cached_regex;
2use std::fmt;
3use std::str::FromStr;
4
5// Regex patterns
6const ATX_PATTERN_STR: &str = r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$";
7const SETEXT_HEADING_1_STR: &str = r"^(\s*)(=+)(\s*)$";
8const SETEXT_HEADING_2_STR: &str = r"^(\s*)(-+)(\s*)$";
9const FENCED_CODE_BLOCK_START_STR: &str = r"^(\s*)(`{3,}|~{3,}).*$";
10const FENCED_CODE_BLOCK_END_STR: &str = r"^(\s*)(`{3,}|~{3,})\s*$";
11const FRONT_MATTER_DELIMITER_STR: &str = r"^---\s*$";
12const HTML_TAG_REGEX_STR: &str = r"<[^>]*>";
13
14// Single line emphasis patterns
15const SINGLE_LINE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*([^*\n]+)\*\s*$";
16const SINGLE_LINE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*_([^_\n]+)_\s*$";
17const SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*\*([^*\n]+)\*\*\s*$";
18const SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*__([^_\n]+)__\s*$";
19
20/// Represents different styles of Markdown headings
21#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
22pub enum HeadingStyle {
23    Atx,       // # Heading
24    AtxClosed, // # Heading #
25    Setext1,   // Heading
26    // =======
27    Setext2, // Heading
28    // -------
29    Consistent,          // For maintaining consistency with the first found header style
30    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
31    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
32}
33
34impl fmt::Display for HeadingStyle {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        let s = match self {
37            HeadingStyle::Atx => "atx",
38            HeadingStyle::AtxClosed => "atx-closed",
39            HeadingStyle::Setext1 => "setext1",
40            HeadingStyle::Setext2 => "setext2",
41            HeadingStyle::Consistent => "consistent",
42            HeadingStyle::SetextWithAtx => "setext-with-atx",
43            HeadingStyle::SetextWithAtxClosed => "setext-with-atx-closed",
44        };
45        write!(f, "{s}")
46    }
47}
48
49impl FromStr for HeadingStyle {
50    type Err = ();
51    fn from_str(s: &str) -> Result<Self, Self::Err> {
52        let normalized = s.trim().to_ascii_lowercase().replace('-', "_");
53        match normalized.as_str() {
54            "atx" => Ok(HeadingStyle::Atx),
55            "atx_closed" => Ok(HeadingStyle::AtxClosed),
56            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
57            "setext2" => Ok(HeadingStyle::Setext2),
58            "consistent" => Ok(HeadingStyle::Consistent),
59            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
60            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
61            _ => Err(()),
62        }
63    }
64}
65
66/// Represents a heading in a Markdown document
67#[derive(Debug, Clone, PartialEq)]
68pub struct Heading {
69    pub text: String,
70    pub level: u32,
71    pub style: HeadingStyle,
72    pub line_number: usize,
73    pub original_text: String,
74    pub indentation: String,
75}
76
77/// Utility functions for working with Markdown headings
78pub struct HeadingUtils;
79
80impl HeadingUtils {
81    /// Check if a line is an ATX heading (starts with #)
82    pub fn is_atx_heading(line: &str) -> bool {
83        get_cached_regex(ATX_PATTERN_STR)
84            .map(|re| re.is_match(line))
85            .unwrap_or(false)
86    }
87
88    /// Check if a line is inside a code block
89    pub fn is_in_code_block(content: &str, line_number: usize) -> bool {
90        let mut in_code_block = false;
91        let mut fence_char = None;
92        let mut line_count = 0;
93
94        for line in content.lines() {
95            line_count += 1;
96            if line_count > line_number {
97                break;
98            }
99
100            let trimmed = line.trim();
101            if trimmed.len() >= 3 {
102                let first_chars: Vec<char> = trimmed.chars().take(3).collect();
103                if first_chars.iter().all(|&c| c == '`' || c == '~') {
104                    if let Some(current_fence) = fence_char {
105                        if first_chars[0] == current_fence && first_chars.iter().all(|&c| c == current_fence) {
106                            in_code_block = false;
107                            fence_char = None;
108                        }
109                    } else {
110                        in_code_block = true;
111                        fence_char = Some(first_chars[0]);
112                    }
113                }
114            }
115        }
116
117        in_code_block
118    }
119
120    /// Parse a line into a Heading struct if it's a valid heading
121    pub fn parse_heading(content: &str, line_num: usize) -> Option<Heading> {
122        let lines: Vec<&str> = content.lines().collect();
123        if line_num == 0 || line_num > lines.len() {
124            return None;
125        }
126
127        let line = lines[line_num - 1];
128
129        // Skip if line is within a code block
130        if Self::is_in_code_block(content, line_num) {
131            return None;
132        }
133
134        // Check for ATX style headings
135        if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
136            let indentation = captures.get(1).map_or("", |m| m.as_str()).to_string();
137            let opening_hashes = captures.get(2).map_or("", |m| m.as_str());
138            let level = opening_hashes.len() as u32;
139            let text = captures.get(4).map_or("", |m| m.as_str()).to_string();
140
141            let style = if let Some(closing) = captures.get(5) {
142                let closing_hashes = closing.as_str();
143                if closing_hashes.len() == opening_hashes.len() {
144                    HeadingStyle::AtxClosed
145                } else {
146                    HeadingStyle::Atx
147                }
148            } else {
149                HeadingStyle::Atx
150            };
151
152            let heading = Heading {
153                text: text.clone(),
154                level,
155                style,
156                line_number: line_num,
157                original_text: line.to_string(),
158                indentation: indentation.clone(),
159            };
160            return Some(heading);
161        }
162
163        // Check for Setext style headings
164        if line_num < lines.len() {
165            let next_line = lines[line_num];
166            let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
167
168            // Skip empty lines - don't consider them as potential Setext headings
169            if line.trim().is_empty() {
170                return None;
171            }
172
173            // Skip list items - they shouldn't be considered as potential Setext headings
174            if line.trim_start().starts_with('-')
175                || line.trim_start().starts_with('*')
176                || line.trim_start().starts_with('+')
177                || line.trim_start().starts_with("1.")
178            {
179                return None;
180            }
181
182            // Skip front matter delimiters or lines within front matter
183            if line.trim() == "---" || Self::is_in_front_matter(content, line_num - 1) {
184                return None;
185            }
186
187            if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
188                .ok()
189                .and_then(|re| re.captures(next_line))
190            {
191                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
192                if underline_indent == line_indentation {
193                    let heading = Heading {
194                        text: line[line_indentation.len()..].to_string(),
195                        level: 1,
196                        style: HeadingStyle::Setext1,
197                        line_number: line_num,
198                        original_text: format!("{line}\n{next_line}"),
199                        indentation: line_indentation.clone(),
200                    };
201                    return Some(heading);
202                }
203            } else if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
204                .ok()
205                .and_then(|re| re.captures(next_line))
206            {
207                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
208                if underline_indent == line_indentation {
209                    let heading = Heading {
210                        text: line[line_indentation.len()..].to_string(),
211                        level: 2,
212                        style: HeadingStyle::Setext2,
213                        line_number: line_num,
214                        original_text: format!("{line}\n{next_line}"),
215                        indentation: line_indentation.clone(),
216                    };
217                    return Some(heading);
218                }
219            }
220        }
221
222        None
223    }
224
225    /// Get the indentation level of a line
226    pub fn get_indentation(line: &str) -> usize {
227        line.len() - line.trim_start().len()
228    }
229
230    /// Convert a heading to a different style
231    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
232        if text_content.trim().is_empty() {
233            return String::new();
234        }
235
236        // Validate heading level
237        let level = level.clamp(1, 6);
238        let indentation = text_content
239            .chars()
240            .take_while(|c| c.is_whitespace())
241            .collect::<String>();
242        let text_content = text_content.trim();
243
244        match style {
245            HeadingStyle::Atx => {
246                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
247            }
248            HeadingStyle::AtxClosed => {
249                format!(
250                    "{}{} {} {}",
251                    indentation,
252                    "#".repeat(level as usize),
253                    text_content,
254                    "#".repeat(level as usize)
255                )
256            }
257            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
258                if level > 2 {
259                    // Fall back to ATX style for levels > 2
260                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
261                } else {
262                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
263                        '='
264                    } else {
265                        '-'
266                    };
267                    let visible_length = text_content.chars().count();
268                    let underline_length = visible_length.max(1); // Ensure at least 1 underline char
269                    format!(
270                        "{}{}\n{}{}",
271                        indentation,
272                        text_content,
273                        indentation,
274                        underline_char.to_string().repeat(underline_length)
275                    )
276                }
277            }
278            HeadingStyle::Consistent => {
279                // For Consistent style, default to ATX as it's the most commonly used
280                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
281            }
282            HeadingStyle::SetextWithAtx => {
283                if level <= 2 {
284                    // Use Setext for h1/h2
285                    let underline_char = if level == 1 { '=' } else { '-' };
286                    let visible_length = text_content.chars().count();
287                    let underline_length = visible_length.max(1);
288                    format!(
289                        "{}{}\n{}{}",
290                        indentation,
291                        text_content,
292                        indentation,
293                        underline_char.to_string().repeat(underline_length)
294                    )
295                } else {
296                    // Use ATX for h3-h6
297                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
298                }
299            }
300            HeadingStyle::SetextWithAtxClosed => {
301                if level <= 2 {
302                    // Use Setext for h1/h2
303                    let underline_char = if level == 1 { '=' } else { '-' };
304                    let visible_length = text_content.chars().count();
305                    let underline_length = visible_length.max(1);
306                    format!(
307                        "{}{}\n{}{}",
308                        indentation,
309                        text_content,
310                        indentation,
311                        underline_char.to_string().repeat(underline_length)
312                    )
313                } else {
314                    // Use ATX closed for h3-h6
315                    format!(
316                        "{}{} {} {}",
317                        indentation,
318                        "#".repeat(level as usize),
319                        text_content,
320                        "#".repeat(level as usize)
321                    )
322                }
323            }
324        }
325    }
326
327    /// Get the text content of a heading line
328    pub fn get_heading_text(line: &str) -> Option<String> {
329        get_cached_regex(ATX_PATTERN_STR)
330            .ok()
331            .and_then(|re| re.captures(line))
332            .map(|captures| captures.get(4).map_or("", |m| m.as_str()).trim().to_string())
333    }
334
335    /// Detect emphasis-only lines
336    pub fn is_emphasis_only_line(line: &str) -> bool {
337        let trimmed = line.trim();
338        get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
339            .map(|re| re.is_match(trimmed))
340            .unwrap_or(false)
341            || get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
342                .map(|re| re.is_match(trimmed))
343                .unwrap_or(false)
344            || get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
345                .map(|re| re.is_match(trimmed))
346                .unwrap_or(false)
347            || get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
348                .map(|re| re.is_match(trimmed))
349                .unwrap_or(false)
350    }
351
352    /// Extract text from an emphasis-only line
353    pub fn extract_emphasis_text(line: &str) -> Option<(String, u32)> {
354        let trimmed = line.trim();
355
356        if let Some(caps) = get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
357            .ok()
358            .and_then(|re| re.captures(trimmed))
359        {
360            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
361        }
362
363        if let Some(caps) = get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
364            .ok()
365            .and_then(|re| re.captures(trimmed))
366        {
367            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
368        }
369
370        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
371            .ok()
372            .and_then(|re| re.captures(trimmed))
373        {
374            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
375        }
376
377        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
378            .ok()
379            .and_then(|re| re.captures(trimmed))
380        {
381            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
382        }
383
384        None
385    }
386
387    /// Convert emphasis to heading
388    pub fn convert_emphasis_to_heading(line: &str) -> Option<String> {
389        // Preserve the original indentation
390        let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
391        // Preserve trailing spaces at the end of the line
392        let trailing = if line.ends_with(" ") {
393            line.chars().rev().take_while(|c| c.is_whitespace()).collect::<String>()
394        } else {
395            String::new()
396        };
397
398        if let Some((text, level)) = Self::extract_emphasis_text(line) {
399            // Preserve the original indentation and trailing spaces
400            Some(format!(
401                "{}{} {}{}",
402                indentation,
403                "#".repeat(level as usize),
404                text,
405                trailing
406            ))
407        } else {
408            None
409        }
410    }
411
412    /// Convert a heading text to a valid ID for fragment links
413    pub fn heading_to_fragment(text: &str) -> String {
414        // Remove any HTML tags
415        let text_no_html = get_cached_regex(HTML_TAG_REGEX_STR)
416            .map(|re| re.replace_all(text, ""))
417            .unwrap_or_else(|_| text.into());
418
419        // Convert to lowercase and trim
420        let text_lower = text_no_html.trim().to_lowercase();
421
422        // Replace spaces and punctuation with hyphens
423        let text_with_hyphens = text_lower
424            .chars()
425            .map(|c| if c.is_alphanumeric() { c } else { '-' })
426            .collect::<String>();
427
428        // Replace multiple consecutive hyphens with a single hyphen
429        let text_clean = text_with_hyphens
430            .split('-')
431            .filter(|s| !s.is_empty())
432            .collect::<Vec<_>>()
433            .join("-");
434
435        // Remove leading and trailing hyphens
436        text_clean.trim_matches('-').to_string()
437    }
438
439    /// Check if a line is in front matter
440    pub fn is_in_front_matter(content: &str, line_number: usize) -> bool {
441        let lines: Vec<&str> = content.lines().collect();
442        if lines.is_empty() || line_number >= lines.len() {
443            return false;
444        }
445
446        // Check if the document starts with front matter
447        if !lines[0].trim_start().eq("---") {
448            return false;
449        }
450
451        let mut in_front_matter = true;
452        let mut found_closing = false;
453
454        // Skip the first line (opening delimiter)
455        for (i, line) in lines.iter().enumerate().skip(1) {
456            if i > line_number {
457                break;
458            }
459
460            if line.trim_start().eq("---") {
461                found_closing = true;
462                in_front_matter = i > line_number;
463                break;
464            }
465        }
466
467        in_front_matter && !found_closing
468    }
469}
470
471/// Checks if a line is a heading
472#[inline]
473pub fn is_heading(line: &str) -> bool {
474    // Fast path checks first
475    let trimmed = line.trim();
476    if trimmed.is_empty() {
477        return false;
478    }
479
480    if trimmed.starts_with('#') {
481        // Check for ATX heading
482        get_cached_regex(ATX_PATTERN_STR)
483            .map(|re| re.is_match(line))
484            .unwrap_or(false)
485    } else {
486        // We can't tell for setext headings without looking at the next line
487        false
488    }
489}
490
491/// Checks if a line is a setext heading marker
492#[inline]
493pub fn is_setext_heading_marker(line: &str) -> bool {
494    get_cached_regex(SETEXT_HEADING_1_STR)
495        .map(|re| re.is_match(line))
496        .unwrap_or(false)
497        || get_cached_regex(SETEXT_HEADING_2_STR)
498            .map(|re| re.is_match(line))
499            .unwrap_or(false)
500}
501
502/// Checks if a line is a setext heading by examining its next line
503#[inline]
504pub fn is_setext_heading(lines: &[&str], index: usize) -> bool {
505    if index >= lines.len() - 1 {
506        return false;
507    }
508
509    let current_line = lines[index];
510    let next_line = lines[index + 1];
511
512    // Skip if current line is empty
513    if current_line.trim().is_empty() {
514        return false;
515    }
516
517    // Check if next line is a setext heading marker with same indentation
518    let current_indentation = current_line
519        .chars()
520        .take_while(|c| c.is_whitespace())
521        .collect::<String>();
522
523    if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
524        .ok()
525        .and_then(|re| re.captures(next_line))
526    {
527        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
528        return underline_indent == current_indentation;
529    }
530
531    if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
532        .ok()
533        .and_then(|re| re.captures(next_line))
534    {
535        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
536        return underline_indent == current_indentation;
537    }
538
539    false
540}
541
542/// Get the heading level for a line
543#[inline]
544pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
545    if index >= lines.len() {
546        return 0;
547    }
548
549    let line = lines[index];
550
551    // Check for ATX style heading
552    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
553        let hashes = captures.get(2).map_or("", |m| m.as_str());
554        return hashes.len() as u32;
555    }
556
557    // Check for setext style heading
558    if index < lines.len() - 1 {
559        let next_line = lines[index + 1];
560
561        if get_cached_regex(SETEXT_HEADING_1_STR)
562            .map(|re| re.is_match(next_line))
563            .unwrap_or(false)
564        {
565            return 1;
566        }
567
568        if get_cached_regex(SETEXT_HEADING_2_STR)
569            .map(|re| re.is_match(next_line))
570            .unwrap_or(false)
571        {
572            return 2;
573        }
574    }
575
576    0
577}
578
579/// Extract the text content from a heading
580#[inline]
581pub fn extract_heading_text(lines: &[&str], index: usize) -> String {
582    if index >= lines.len() {
583        return String::new();
584    }
585
586    let line = lines[index];
587
588    // Extract from ATX heading
589    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
590        return captures.get(4).map_or("", |m| m.as_str()).trim().to_string();
591    }
592
593    // Extract from setext heading
594    if index < lines.len() - 1 {
595        let next_line = lines[index + 1];
596        let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
597
598        if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
599            .ok()
600            .and_then(|re| re.captures(next_line))
601        {
602            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
603            if underline_indent == line_indentation {
604                return line[line_indentation.len()..].trim().to_string();
605            }
606        }
607
608        if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
609            .ok()
610            .and_then(|re| re.captures(next_line))
611        {
612            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
613            if underline_indent == line_indentation {
614                return line[line_indentation.len()..].trim().to_string();
615            }
616        }
617    }
618
619    line.trim().to_string()
620}
621
622/// Get the indentation of a heading
623#[inline]
624pub fn get_heading_indentation(lines: &[&str], index: usize) -> usize {
625    if index >= lines.len() {
626        return 0;
627    }
628
629    let line = lines[index];
630    line.len() - line.trim_start().len()
631}
632
633/// Check if a line is a code block delimiter
634#[inline]
635pub fn is_code_block_delimiter(line: &str) -> bool {
636    get_cached_regex(FENCED_CODE_BLOCK_START_STR)
637        .map(|re| re.is_match(line))
638        .unwrap_or(false)
639        || get_cached_regex(FENCED_CODE_BLOCK_END_STR)
640            .map(|re| re.is_match(line))
641            .unwrap_or(false)
642}
643
644/// Check if a line is a front matter delimiter
645#[inline]
646pub fn is_front_matter_delimiter(line: &str) -> bool {
647    get_cached_regex(FRONT_MATTER_DELIMITER_STR)
648        .map(|re| re.is_match(line))
649        .unwrap_or(false)
650}
651
652/// Remove trailing hashes from a heading
653#[inline]
654pub fn remove_trailing_hashes(text: &str) -> String {
655    let trimmed = text.trim_end();
656
657    // Find the last hash
658    if let Some(last_hash_index) = trimmed.rfind('#') {
659        // Check if everything after this position is only hashes and whitespace
660        if trimmed[last_hash_index..]
661            .chars()
662            .all(|c| c == '#' || c.is_whitespace())
663        {
664            // Find the start of the trailing hash sequence
665            let mut first_hash_index = last_hash_index;
666            let trimmed_chars: Vec<char> = trimmed.chars().collect();
667            while first_hash_index > 0 {
668                let prev_index = first_hash_index - 1;
669                if prev_index < trimmed_chars.len() && trimmed_chars[prev_index] == '#' {
670                    first_hash_index = prev_index;
671                } else {
672                    break;
673                }
674            }
675
676            // Remove the trailing hashes
677            return trimmed[..first_hash_index].trim_end().to_string();
678        }
679    }
680
681    trimmed.to_string()
682}
683
684/// Normalize a heading to the specified level
685#[inline]
686pub fn normalize_heading(line: &str, level: u32) -> String {
687    let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
688    let trimmed = line.trim_start();
689
690    if trimmed.starts_with('#') {
691        if let Some(text) = HeadingUtils::get_heading_text(line) {
692            format!("{}{} {}", indentation, "#".repeat(level as usize), text)
693        } else {
694            line.to_string()
695        }
696    } else {
697        format!("{}{} {}", indentation, "#".repeat(level as usize), trimmed)
698    }
699}
700
701#[cfg(test)]
702mod tests {
703    use super::*;
704
705    #[test]
706    fn test_atx_heading_parsing() {
707        let content = "# Heading 1\n## Heading 2\n### Heading 3";
708        assert!(HeadingUtils::parse_heading(content, 1).is_some());
709        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
710        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().level, 2);
711        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 3);
712    }
713
714    #[test]
715    fn test_setext_heading_parsing() {
716        let content = "Heading 1\n=========\nHeading 2\n---------";
717        assert!(HeadingUtils::parse_heading(content, 1).is_some());
718        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
719        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 2);
720    }
721
722    #[test]
723    fn test_heading_style_conversion() {
724        assert_eq!(
725            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
726            "# Heading 1"
727        );
728        assert_eq!(
729            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
730            "## Heading 2 ##"
731        );
732        assert_eq!(
733            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
734            "Heading 1\n========="
735        );
736        assert_eq!(
737            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
738            "Heading 2\n---------"
739        );
740    }
741
742    #[test]
743    fn test_code_block_detection() {
744        let content = "# Heading\n```\n# Not a heading\n```\n# Another heading";
745        assert!(!HeadingUtils::is_in_code_block(content, 0));
746        assert!(HeadingUtils::is_in_code_block(content, 2));
747        assert!(!HeadingUtils::is_in_code_block(content, 4));
748    }
749
750    #[test]
751    fn test_empty_line_with_dashes() {
752        // Test that an empty line followed by dashes is not considered a heading
753        let content = "\n---";
754
755        // Empty line is at index 0, dashes at index 1
756        assert_eq!(
757            HeadingUtils::parse_heading(content, 1),
758            None,
759            "Empty line followed by dashes should not be detected as a heading"
760        );
761
762        // Also test with a regular horizontal rule
763        let content2 = "Some content\n\n---\nMore content";
764        assert_eq!(
765            HeadingUtils::parse_heading(content2, 2),
766            None,
767            "Empty line followed by horizontal rule should not be detected as a heading"
768        );
769    }
770
771    #[test]
772    fn test_is_atx_heading() {
773        assert!(HeadingUtils::is_atx_heading("# Heading"));
774        assert!(HeadingUtils::is_atx_heading("## Heading"));
775        assert!(HeadingUtils::is_atx_heading("### Heading"));
776        assert!(HeadingUtils::is_atx_heading("#### Heading"));
777        assert!(HeadingUtils::is_atx_heading("##### Heading"));
778        assert!(HeadingUtils::is_atx_heading("###### Heading"));
779        assert!(HeadingUtils::is_atx_heading("  # Indented"));
780        assert!(HeadingUtils::is_atx_heading("# Heading #"));
781        assert!(HeadingUtils::is_atx_heading("## Heading ###"));
782
783        assert!(!HeadingUtils::is_atx_heading("####### Too many"));
784        assert!(!HeadingUtils::is_atx_heading("Not a heading"));
785        assert!(HeadingUtils::is_atx_heading("#")); // Single # is a valid heading
786        assert!(!HeadingUtils::is_atx_heading(""));
787    }
788
789    #[test]
790    fn test_heading_edge_cases() {
791        // Test invalid line numbers
792        let content = "# Heading";
793        assert!(HeadingUtils::parse_heading(content, 0).is_none());
794        assert!(HeadingUtils::parse_heading(content, 10).is_none());
795
796        // Test headings in code blocks
797        let content = "```\n# Not a heading\n```";
798        assert!(HeadingUtils::parse_heading(content, 2).is_none());
799
800        // Test with tildes for code blocks
801        let content = "~~~\n# Not a heading\n~~~";
802        assert!(HeadingUtils::is_in_code_block(content, 2));
803
804        // Test mixed fence characters
805        let content = "```\n# Content\n~~~"; // Mismatched fences
806        assert!(HeadingUtils::is_in_code_block(content, 2));
807    }
808
809    #[test]
810    fn test_atx_closed_heading_variations() {
811        let content = "# Heading #\n## Heading ##\n### Heading ####\n#### Heading ##";
812        let h1 = HeadingUtils::parse_heading(content, 1).unwrap();
813        assert_eq!(h1.style, HeadingStyle::AtxClosed);
814        assert_eq!(h1.text, "Heading");
815
816        let h2 = HeadingUtils::parse_heading(content, 2).unwrap();
817        assert_eq!(h2.style, HeadingStyle::AtxClosed);
818
819        // Mismatched closing hashes - still ATX but not closed
820        let h3 = HeadingUtils::parse_heading(content, 3).unwrap();
821        assert_eq!(h3.style, HeadingStyle::Atx);
822
823        let h4 = HeadingUtils::parse_heading(content, 4).unwrap();
824        assert_eq!(h4.style, HeadingStyle::Atx);
825    }
826
827    #[test]
828    fn test_setext_heading_edge_cases() {
829        // List item followed by dashes should not be a heading
830        let content = "- List item\n---------";
831        assert!(HeadingUtils::parse_heading(content, 1).is_none());
832
833        // Front matter should not be a heading
834        let content = "---\ntitle: test\n---";
835        assert!(HeadingUtils::parse_heading(content, 1).is_none());
836
837        // Indented setext headings
838        let content = "  Indented\n  ========";
839        let heading = HeadingUtils::parse_heading(content, 1).unwrap();
840        assert_eq!(heading.indentation, "  ");
841        assert_eq!(heading.text, "Indented");
842
843        // Mismatched indentation should not be a heading
844        let content = "  Text\n========"; // No indent on underline
845        assert!(HeadingUtils::parse_heading(content, 1).is_none());
846    }
847
848    #[test]
849    fn test_get_indentation() {
850        assert_eq!(HeadingUtils::get_indentation("# Heading"), 0);
851        assert_eq!(HeadingUtils::get_indentation("  # Heading"), 2);
852        assert_eq!(HeadingUtils::get_indentation("    # Heading"), 4);
853        assert_eq!(HeadingUtils::get_indentation("\t# Heading"), 1);
854        assert_eq!(HeadingUtils::get_indentation(""), 0);
855    }
856
857    #[test]
858    fn test_convert_heading_style_edge_cases() {
859        // Empty text
860        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "");
861        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "");
862
863        // Level clamping
864        assert_eq!(
865            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
866            "# Text"
867        );
868        assert_eq!(
869            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
870            "###### Text"
871        );
872
873        // Setext with level > 2 falls back to ATX
874        assert_eq!(
875            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
876            "### Text"
877        );
878
879        // Preserve indentation
880        assert_eq!(
881            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
882            "  # Text"
883        );
884
885        // Very short text for setext
886        assert_eq!(
887            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
888            "Hi\n=="
889        );
890    }
891
892    #[test]
893    fn test_get_heading_text() {
894        assert_eq!(HeadingUtils::get_heading_text("# Heading"), Some("Heading".to_string()));
895        assert_eq!(
896            HeadingUtils::get_heading_text("## Heading ##"),
897            Some("Heading".to_string())
898        );
899        assert_eq!(
900            HeadingUtils::get_heading_text("###   Spaces   "),
901            Some("Spaces".to_string())
902        );
903        assert_eq!(HeadingUtils::get_heading_text("Not a heading"), None);
904        assert_eq!(HeadingUtils::get_heading_text(""), None);
905    }
906
907    #[test]
908    fn test_emphasis_detection() {
909        assert!(HeadingUtils::is_emphasis_only_line("*emphasis*"));
910        assert!(HeadingUtils::is_emphasis_only_line("_emphasis_"));
911        assert!(HeadingUtils::is_emphasis_only_line("**strong**"));
912        assert!(HeadingUtils::is_emphasis_only_line("__strong__"));
913        assert!(HeadingUtils::is_emphasis_only_line("  *emphasis*  "));
914
915        assert!(!HeadingUtils::is_emphasis_only_line("*not* emphasis"));
916        assert!(!HeadingUtils::is_emphasis_only_line("text *emphasis*"));
917        assert!(!HeadingUtils::is_emphasis_only_line("**"));
918        assert!(!HeadingUtils::is_emphasis_only_line(""));
919    }
920
921    #[test]
922    fn test_extract_emphasis_text() {
923        assert_eq!(
924            HeadingUtils::extract_emphasis_text("*text*"),
925            Some(("text".to_string(), 1))
926        );
927        assert_eq!(
928            HeadingUtils::extract_emphasis_text("_text_"),
929            Some(("text".to_string(), 1))
930        );
931        assert_eq!(
932            HeadingUtils::extract_emphasis_text("**text**"),
933            Some(("text".to_string(), 2))
934        );
935        assert_eq!(
936            HeadingUtils::extract_emphasis_text("__text__"),
937            Some(("text".to_string(), 2))
938        );
939        assert_eq!(
940            HeadingUtils::extract_emphasis_text("  *spaced*  "),
941            Some(("spaced".to_string(), 1))
942        );
943
944        assert_eq!(HeadingUtils::extract_emphasis_text("not emphasis"), None);
945        assert_eq!(HeadingUtils::extract_emphasis_text("*not* complete"), None);
946    }
947
948    #[test]
949    fn test_convert_emphasis_to_heading() {
950        assert_eq!(
951            HeadingUtils::convert_emphasis_to_heading("*text*"),
952            Some("# text".to_string())
953        );
954        assert_eq!(
955            HeadingUtils::convert_emphasis_to_heading("**text**"),
956            Some("## text".to_string())
957        );
958        assert_eq!(
959            HeadingUtils::convert_emphasis_to_heading("  *text*"),
960            Some("  # text".to_string())
961        );
962        assert_eq!(
963            HeadingUtils::convert_emphasis_to_heading("*text* "),
964            Some("# text ".to_string())
965        );
966
967        assert_eq!(HeadingUtils::convert_emphasis_to_heading("not emphasis"), None);
968    }
969
970    #[test]
971    fn test_heading_to_fragment() {
972        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
973        assert_eq!(
974            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
975            "heading-with-numbers-123"
976        );
977        assert_eq!(
978            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
979            "special-characters"
980        );
981        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
982        assert_eq!(
983            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
984            "multiple-spaces"
985        );
986        assert_eq!(
987            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
988            "heading-with-html"
989        );
990        assert_eq!(
991            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
992            "leading-dashes"
993        );
994        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
995    }
996
997    #[test]
998    fn test_is_in_front_matter() {
999        let content = "---\ntitle: Test\n---\n# Content";
1000        assert!(HeadingUtils::is_in_front_matter(content, 1));
1001        assert!(!HeadingUtils::is_in_front_matter(content, 2)); // Closing delimiter is not considered in front matter
1002        assert!(!HeadingUtils::is_in_front_matter(content, 3));
1003        assert!(!HeadingUtils::is_in_front_matter(content, 4));
1004
1005        // No front matter
1006        let content = "# Just content";
1007        assert!(!HeadingUtils::is_in_front_matter(content, 0));
1008
1009        // Unclosed front matter
1010        let content = "---\ntitle: Test\n# No closing";
1011        assert!(HeadingUtils::is_in_front_matter(content, 1));
1012        assert!(HeadingUtils::is_in_front_matter(content, 2)); // Still in unclosed front matter
1013
1014        // Front matter not at start
1015        let content = "# Heading\n---\ntitle: Test\n---";
1016        assert!(!HeadingUtils::is_in_front_matter(content, 2));
1017    }
1018
1019    #[test]
1020    fn test_module_level_functions() {
1021        // Test is_heading
1022        assert!(is_heading("# Heading"));
1023        assert!(is_heading("  ## Indented"));
1024        assert!(!is_heading("Not a heading"));
1025        assert!(!is_heading(""));
1026
1027        // Test is_setext_heading_marker
1028        assert!(is_setext_heading_marker("========"));
1029        assert!(is_setext_heading_marker("--------"));
1030        assert!(is_setext_heading_marker("  ======"));
1031        assert!(!is_setext_heading_marker("# Heading"));
1032        assert!(is_setext_heading_marker("---")); // Three dashes is valid
1033
1034        // Test is_setext_heading
1035        let lines = vec!["Title", "====="];
1036        assert!(is_setext_heading(&lines, 0));
1037
1038        let lines = vec!["", "====="];
1039        assert!(!is_setext_heading(&lines, 0));
1040
1041        // Test get_heading_level
1042        let lines = vec!["# H1", "## H2", "### H3"];
1043        assert_eq!(get_heading_level(&lines, 0), 1);
1044        assert_eq!(get_heading_level(&lines, 1), 2);
1045        assert_eq!(get_heading_level(&lines, 2), 3);
1046        assert_eq!(get_heading_level(&lines, 10), 0);
1047
1048        // Test extract_heading_text
1049        let lines = vec!["# Heading Text", "## Another ###"];
1050        assert_eq!(extract_heading_text(&lines, 0), "Heading Text");
1051        assert_eq!(extract_heading_text(&lines, 1), "Another");
1052
1053        // Test get_heading_indentation
1054        let lines = vec!["# No indent", "  ## Two spaces", "    ### Four spaces"];
1055        assert_eq!(get_heading_indentation(&lines, 0), 0);
1056        assert_eq!(get_heading_indentation(&lines, 1), 2);
1057        assert_eq!(get_heading_indentation(&lines, 2), 4);
1058    }
1059
1060    #[test]
1061    fn test_is_code_block_delimiter() {
1062        assert!(is_code_block_delimiter("```"));
1063        assert!(is_code_block_delimiter("~~~"));
1064        assert!(is_code_block_delimiter("````"));
1065        assert!(is_code_block_delimiter("```rust"));
1066        assert!(is_code_block_delimiter("  ```"));
1067
1068        assert!(!is_code_block_delimiter("``")); // Too short
1069        assert!(!is_code_block_delimiter("# Heading"));
1070    }
1071
1072    #[test]
1073    fn test_is_front_matter_delimiter() {
1074        assert!(is_front_matter_delimiter("---"));
1075        assert!(is_front_matter_delimiter("---  "));
1076
1077        assert!(!is_front_matter_delimiter("----"));
1078        assert!(!is_front_matter_delimiter("--"));
1079        assert!(!is_front_matter_delimiter("# ---"));
1080    }
1081
1082    #[test]
1083    fn test_remove_trailing_hashes() {
1084        assert_eq!(remove_trailing_hashes("Heading ###"), "Heading");
1085        assert_eq!(remove_trailing_hashes("Heading ## "), "Heading");
1086        assert_eq!(remove_trailing_hashes("Heading #not trailing"), "Heading #not trailing");
1087        assert_eq!(remove_trailing_hashes("No hashes"), "No hashes");
1088        assert_eq!(remove_trailing_hashes(""), "");
1089
1090        // Test the specific case that was failing
1091        assert_eq!(remove_trailing_hashes("Heading ##"), "Heading");
1092        assert_eq!(remove_trailing_hashes("Heading #"), "Heading");
1093        assert_eq!(remove_trailing_hashes("Heading ####"), "Heading");
1094
1095        // Edge cases
1096        assert_eq!(remove_trailing_hashes("#"), "");
1097        assert_eq!(remove_trailing_hashes("##"), "");
1098        assert_eq!(remove_trailing_hashes("###"), "");
1099        assert_eq!(remove_trailing_hashes("Text#"), "Text");
1100        assert_eq!(remove_trailing_hashes("Text ##"), "Text");
1101    }
1102
1103    #[test]
1104    fn test_normalize_heading() {
1105        assert_eq!(normalize_heading("# Old Level", 3), "### Old Level");
1106        assert_eq!(normalize_heading("## Heading ##", 1), "# Heading");
1107        assert_eq!(normalize_heading("  # Indented", 2), "  ## Indented");
1108        assert_eq!(normalize_heading("Plain text", 1), "# Plain text");
1109    }
1110
1111    #[test]
1112    fn test_heading_style_from_str() {
1113        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
1114        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
1115        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
1116        assert_eq!(HeadingStyle::from_str("atx-closed"), Ok(HeadingStyle::AtxClosed));
1117        assert_eq!(HeadingStyle::from_str("ATX-CLOSED"), Ok(HeadingStyle::AtxClosed));
1118        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
1119        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
1120        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
1121        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
1122        assert_eq!(
1123            HeadingStyle::from_str("setext_with_atx"),
1124            Ok(HeadingStyle::SetextWithAtx)
1125        );
1126        assert_eq!(
1127            HeadingStyle::from_str("setext-with-atx"),
1128            Ok(HeadingStyle::SetextWithAtx)
1129        );
1130        assert_eq!(
1131            HeadingStyle::from_str("setext_with_atx_closed"),
1132            Ok(HeadingStyle::SetextWithAtxClosed)
1133        );
1134        assert_eq!(
1135            HeadingStyle::from_str("setext-with-atx-closed"),
1136            Ok(HeadingStyle::SetextWithAtxClosed)
1137        );
1138        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
1139    }
1140
1141    #[test]
1142    fn test_heading_style_display() {
1143        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
1144        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx-closed");
1145        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
1146        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
1147        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
1148    }
1149
1150    #[test]
1151    fn test_unicode_headings() {
1152        let content = "# 你好世界\n## Ñoño\n### 🚀 Emoji";
1153        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().text, "你好世界");
1154        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().text, "Ñoño");
1155        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().text, "🚀 Emoji");
1156
1157        // Test fragment generation with unicode
1158        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
1159        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
1160    }
1161
1162    #[test]
1163    fn test_complex_nested_structures() {
1164        // Code block inside front matter (edge case)
1165        // The function doesn't handle YAML multi-line strings, so ``` inside front matter
1166        // is treated as a code block start
1167        let content = "---\ncode: |\n  ```\n  # Not a heading\n  ```\n---\n# Real heading";
1168        assert!(HeadingUtils::is_in_code_block(content, 4)); // Inside code block
1169        assert!(HeadingUtils::parse_heading(content, 7).is_some());
1170
1171        // Multiple code blocks
1172        let content = "```\ncode\n```\n# Heading\n~~~\nmore code\n~~~";
1173        assert!(!HeadingUtils::is_in_code_block(content, 4));
1174        assert!(HeadingUtils::parse_heading(content, 4).is_some());
1175    }
1176}