rumdl_lib/rules/
heading_utils.rs

1use crate::utils::regex_cache::get_cached_regex;
2use std::fmt;
3use std::str::FromStr;
4
5// Regex patterns
6const ATX_PATTERN_STR: &str = r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$";
7const SETEXT_HEADING_1_STR: &str = r"^(\s*)(=+)(\s*)$";
8const SETEXT_HEADING_2_STR: &str = r"^(\s*)(-+)(\s*)$";
9const FENCED_CODE_BLOCK_START_STR: &str = r"^(\s*)(`{3,}|~{3,}).*$";
10const FENCED_CODE_BLOCK_END_STR: &str = r"^(\s*)(`{3,}|~{3,})\s*$";
11const FRONT_MATTER_DELIMITER_STR: &str = r"^---\s*$";
12const HTML_TAG_REGEX_STR: &str = r"<[^>]*>";
13
14// Single line emphasis patterns
15const SINGLE_LINE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*([^*\n]+)\*\s*$";
16const SINGLE_LINE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*_([^_\n]+)_\s*$";
17const SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*\*([^*\n]+)\*\*\s*$";
18const SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*__([^_\n]+)__\s*$";
19
20/// Represents different styles of Markdown headings
21#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
22pub enum HeadingStyle {
23    Atx,       // # Heading
24    AtxClosed, // # Heading #
25    Setext1,   // Heading
26    // =======
27    Setext2, // Heading
28    // -------
29    Consistent,          // For maintaining consistency with the first found header style
30    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
31    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
32}
33
34impl fmt::Display for HeadingStyle {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        let s = match self {
37            HeadingStyle::Atx => "atx",
38            HeadingStyle::AtxClosed => "atx_closed",
39            HeadingStyle::Setext1 => "setext1",
40            HeadingStyle::Setext2 => "setext2",
41            HeadingStyle::Consistent => "consistent",
42            HeadingStyle::SetextWithAtx => "setext_with_atx",
43            HeadingStyle::SetextWithAtxClosed => "setext_with_atx_closed",
44        };
45        write!(f, "{s}")
46    }
47}
48
49impl FromStr for HeadingStyle {
50    type Err = ();
51    fn from_str(s: &str) -> Result<Self, Self::Err> {
52        match s.to_ascii_lowercase().as_str() {
53            "atx" => Ok(HeadingStyle::Atx),
54            "atx_closed" => Ok(HeadingStyle::AtxClosed),
55            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
56            "setext2" => Ok(HeadingStyle::Setext2),
57            "consistent" => Ok(HeadingStyle::Consistent),
58            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
59            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
60            _ => Err(()),
61        }
62    }
63}
64
65/// Represents a heading in a Markdown document
66#[derive(Debug, Clone, PartialEq)]
67pub struct Heading {
68    pub text: String,
69    pub level: u32,
70    pub style: HeadingStyle,
71    pub line_number: usize,
72    pub original_text: String,
73    pub indentation: String,
74}
75
76/// Utility functions for working with Markdown headings
77pub struct HeadingUtils;
78
79impl HeadingUtils {
80    /// Check if a line is an ATX heading (starts with #)
81    pub fn is_atx_heading(line: &str) -> bool {
82        get_cached_regex(ATX_PATTERN_STR)
83            .map(|re| re.is_match(line))
84            .unwrap_or(false)
85    }
86
87    /// Check if a line is inside a code block
88    pub fn is_in_code_block(content: &str, line_number: usize) -> bool {
89        let mut in_code_block = false;
90        let mut fence_char = None;
91        let mut line_count = 0;
92
93        for line in content.lines() {
94            line_count += 1;
95            if line_count > line_number {
96                break;
97            }
98
99            let trimmed = line.trim();
100            if trimmed.len() >= 3 {
101                let first_chars: Vec<char> = trimmed.chars().take(3).collect();
102                if first_chars.iter().all(|&c| c == '`' || c == '~') {
103                    if let Some(current_fence) = fence_char {
104                        if first_chars[0] == current_fence && first_chars.iter().all(|&c| c == current_fence) {
105                            in_code_block = false;
106                            fence_char = None;
107                        }
108                    } else {
109                        in_code_block = true;
110                        fence_char = Some(first_chars[0]);
111                    }
112                }
113            }
114        }
115
116        in_code_block
117    }
118
119    /// Parse a line into a Heading struct if it's a valid heading
120    pub fn parse_heading(content: &str, line_num: usize) -> Option<Heading> {
121        let lines: Vec<&str> = content.lines().collect();
122        if line_num == 0 || line_num > lines.len() {
123            return None;
124        }
125
126        let line = lines[line_num - 1];
127
128        // Skip if line is within a code block
129        if Self::is_in_code_block(content, line_num) {
130            return None;
131        }
132
133        // Check for ATX style headings
134        if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
135            let indentation = captures.get(1).map_or("", |m| m.as_str()).to_string();
136            let opening_hashes = captures.get(2).map_or("", |m| m.as_str());
137            let level = opening_hashes.len() as u32;
138            let text = captures.get(4).map_or("", |m| m.as_str()).to_string();
139
140            let style = if let Some(closing) = captures.get(5) {
141                let closing_hashes = closing.as_str();
142                if closing_hashes.len() == opening_hashes.len() {
143                    HeadingStyle::AtxClosed
144                } else {
145                    HeadingStyle::Atx
146                }
147            } else {
148                HeadingStyle::Atx
149            };
150
151            let heading = Heading {
152                text: text.clone(),
153                level,
154                style,
155                line_number: line_num,
156                original_text: line.to_string(),
157                indentation: indentation.clone(),
158            };
159            return Some(heading);
160        }
161
162        // Check for Setext style headings
163        if line_num < lines.len() {
164            let next_line = lines[line_num];
165            let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
166
167            // Skip empty lines - don't consider them as potential Setext headings
168            if line.trim().is_empty() {
169                return None;
170            }
171
172            // Skip list items - they shouldn't be considered as potential Setext headings
173            if line.trim_start().starts_with('-')
174                || line.trim_start().starts_with('*')
175                || line.trim_start().starts_with('+')
176                || line.trim_start().starts_with("1.")
177            {
178                return None;
179            }
180
181            // Skip front matter delimiters or lines within front matter
182            if line.trim() == "---" || Self::is_in_front_matter(content, line_num - 1) {
183                return None;
184            }
185
186            if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
187                .ok()
188                .and_then(|re| re.captures(next_line))
189            {
190                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
191                if underline_indent == line_indentation {
192                    let heading = Heading {
193                        text: line[line_indentation.len()..].to_string(),
194                        level: 1,
195                        style: HeadingStyle::Setext1,
196                        line_number: line_num,
197                        original_text: format!("{line}\n{next_line}"),
198                        indentation: line_indentation.clone(),
199                    };
200                    return Some(heading);
201                }
202            } else if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
203                .ok()
204                .and_then(|re| re.captures(next_line))
205            {
206                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
207                if underline_indent == line_indentation {
208                    let heading = Heading {
209                        text: line[line_indentation.len()..].to_string(),
210                        level: 2,
211                        style: HeadingStyle::Setext2,
212                        line_number: line_num,
213                        original_text: format!("{line}\n{next_line}"),
214                        indentation: line_indentation.clone(),
215                    };
216                    return Some(heading);
217                }
218            }
219        }
220
221        None
222    }
223
224    /// Get the indentation level of a line
225    pub fn get_indentation(line: &str) -> usize {
226        line.len() - line.trim_start().len()
227    }
228
229    /// Convert a heading to a different style
230    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
231        if text_content.trim().is_empty() {
232            return String::new();
233        }
234
235        // Validate heading level
236        let level = level.clamp(1, 6);
237        let indentation = text_content
238            .chars()
239            .take_while(|c| c.is_whitespace())
240            .collect::<String>();
241        let text_content = text_content.trim();
242
243        match style {
244            HeadingStyle::Atx => {
245                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
246            }
247            HeadingStyle::AtxClosed => {
248                format!(
249                    "{}{} {} {}",
250                    indentation,
251                    "#".repeat(level as usize),
252                    text_content,
253                    "#".repeat(level as usize)
254                )
255            }
256            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
257                if level > 2 {
258                    // Fall back to ATX style for levels > 2
259                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
260                } else {
261                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
262                        '='
263                    } else {
264                        '-'
265                    };
266                    let visible_length = text_content.chars().count();
267                    let underline_length = visible_length.max(1); // Ensure at least 1 underline char
268                    format!(
269                        "{}{}\n{}{}",
270                        indentation,
271                        text_content,
272                        indentation,
273                        underline_char.to_string().repeat(underline_length)
274                    )
275                }
276            }
277            HeadingStyle::Consistent => {
278                // For Consistent style, default to ATX as it's the most commonly used
279                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
280            }
281            HeadingStyle::SetextWithAtx => {
282                if level <= 2 {
283                    // Use Setext for h1/h2
284                    let underline_char = if level == 1 { '=' } else { '-' };
285                    let visible_length = text_content.chars().count();
286                    let underline_length = visible_length.max(1);
287                    format!(
288                        "{}{}\n{}{}",
289                        indentation,
290                        text_content,
291                        indentation,
292                        underline_char.to_string().repeat(underline_length)
293                    )
294                } else {
295                    // Use ATX for h3-h6
296                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
297                }
298            }
299            HeadingStyle::SetextWithAtxClosed => {
300                if level <= 2 {
301                    // Use Setext for h1/h2
302                    let underline_char = if level == 1 { '=' } else { '-' };
303                    let visible_length = text_content.chars().count();
304                    let underline_length = visible_length.max(1);
305                    format!(
306                        "{}{}\n{}{}",
307                        indentation,
308                        text_content,
309                        indentation,
310                        underline_char.to_string().repeat(underline_length)
311                    )
312                } else {
313                    // Use ATX closed for h3-h6
314                    format!(
315                        "{}{} {} {}",
316                        indentation,
317                        "#".repeat(level as usize),
318                        text_content,
319                        "#".repeat(level as usize)
320                    )
321                }
322            }
323        }
324    }
325
326    /// Get the text content of a heading line
327    pub fn get_heading_text(line: &str) -> Option<String> {
328        get_cached_regex(ATX_PATTERN_STR)
329            .ok()
330            .and_then(|re| re.captures(line))
331            .map(|captures| captures.get(4).map_or("", |m| m.as_str()).trim().to_string())
332    }
333
334    /// Detect emphasis-only lines
335    pub fn is_emphasis_only_line(line: &str) -> bool {
336        let trimmed = line.trim();
337        get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
338            .map(|re| re.is_match(trimmed))
339            .unwrap_or(false)
340            || get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
341                .map(|re| re.is_match(trimmed))
342                .unwrap_or(false)
343            || get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
344                .map(|re| re.is_match(trimmed))
345                .unwrap_or(false)
346            || get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
347                .map(|re| re.is_match(trimmed))
348                .unwrap_or(false)
349    }
350
351    /// Extract text from an emphasis-only line
352    pub fn extract_emphasis_text(line: &str) -> Option<(String, u32)> {
353        let trimmed = line.trim();
354
355        if let Some(caps) = get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
356            .ok()
357            .and_then(|re| re.captures(trimmed))
358        {
359            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
360        }
361
362        if let Some(caps) = get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
363            .ok()
364            .and_then(|re| re.captures(trimmed))
365        {
366            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
367        }
368
369        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
370            .ok()
371            .and_then(|re| re.captures(trimmed))
372        {
373            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
374        }
375
376        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
377            .ok()
378            .and_then(|re| re.captures(trimmed))
379        {
380            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
381        }
382
383        None
384    }
385
386    /// Convert emphasis to heading
387    pub fn convert_emphasis_to_heading(line: &str) -> Option<String> {
388        // Preserve the original indentation
389        let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
390        // Preserve trailing spaces at the end of the line
391        let trailing = if line.ends_with(" ") {
392            line.chars().rev().take_while(|c| c.is_whitespace()).collect::<String>()
393        } else {
394            String::new()
395        };
396
397        if let Some((text, level)) = Self::extract_emphasis_text(line) {
398            // Preserve the original indentation and trailing spaces
399            Some(format!(
400                "{}{} {}{}",
401                indentation,
402                "#".repeat(level as usize),
403                text,
404                trailing
405            ))
406        } else {
407            None
408        }
409    }
410
411    /// Convert a heading text to a valid ID for fragment links
412    pub fn heading_to_fragment(text: &str) -> String {
413        // Remove any HTML tags
414        let text_no_html = get_cached_regex(HTML_TAG_REGEX_STR)
415            .map(|re| re.replace_all(text, ""))
416            .unwrap_or_else(|_| text.into());
417
418        // Convert to lowercase and trim
419        let text_lower = text_no_html.trim().to_lowercase();
420
421        // Replace spaces and punctuation with hyphens
422        let text_with_hyphens = text_lower
423            .chars()
424            .map(|c| if c.is_alphanumeric() { c } else { '-' })
425            .collect::<String>();
426
427        // Replace multiple consecutive hyphens with a single hyphen
428        let text_clean = text_with_hyphens
429            .split('-')
430            .filter(|s| !s.is_empty())
431            .collect::<Vec<_>>()
432            .join("-");
433
434        // Remove leading and trailing hyphens
435        text_clean.trim_matches('-').to_string()
436    }
437
438    /// Check if a line is in front matter
439    pub fn is_in_front_matter(content: &str, line_number: usize) -> bool {
440        let lines: Vec<&str> = content.lines().collect();
441        if lines.is_empty() || line_number >= lines.len() {
442            return false;
443        }
444
445        // Check if the document starts with front matter
446        if !lines[0].trim_start().eq("---") {
447            return false;
448        }
449
450        let mut in_front_matter = true;
451        let mut found_closing = false;
452
453        // Skip the first line (opening delimiter)
454        for (i, line) in lines.iter().enumerate().skip(1) {
455            if i > line_number {
456                break;
457            }
458
459            if line.trim_start().eq("---") {
460                found_closing = true;
461                in_front_matter = i > line_number;
462                break;
463            }
464        }
465
466        in_front_matter && !found_closing
467    }
468}
469
470/// Checks if a line is a heading
471#[inline]
472pub fn is_heading(line: &str) -> bool {
473    // Fast path checks first
474    let trimmed = line.trim();
475    if trimmed.is_empty() {
476        return false;
477    }
478
479    if trimmed.starts_with('#') {
480        // Check for ATX heading
481        get_cached_regex(ATX_PATTERN_STR)
482            .map(|re| re.is_match(line))
483            .unwrap_or(false)
484    } else {
485        // We can't tell for setext headings without looking at the next line
486        false
487    }
488}
489
490/// Checks if a line is a setext heading marker
491#[inline]
492pub fn is_setext_heading_marker(line: &str) -> bool {
493    get_cached_regex(SETEXT_HEADING_1_STR)
494        .map(|re| re.is_match(line))
495        .unwrap_or(false)
496        || get_cached_regex(SETEXT_HEADING_2_STR)
497            .map(|re| re.is_match(line))
498            .unwrap_or(false)
499}
500
501/// Checks if a line is a setext heading by examining its next line
502#[inline]
503pub fn is_setext_heading(lines: &[&str], index: usize) -> bool {
504    if index >= lines.len() - 1 {
505        return false;
506    }
507
508    let current_line = lines[index];
509    let next_line = lines[index + 1];
510
511    // Skip if current line is empty
512    if current_line.trim().is_empty() {
513        return false;
514    }
515
516    // Check if next line is a setext heading marker with same indentation
517    let current_indentation = current_line
518        .chars()
519        .take_while(|c| c.is_whitespace())
520        .collect::<String>();
521
522    if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
523        .ok()
524        .and_then(|re| re.captures(next_line))
525    {
526        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
527        return underline_indent == current_indentation;
528    }
529
530    if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
531        .ok()
532        .and_then(|re| re.captures(next_line))
533    {
534        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
535        return underline_indent == current_indentation;
536    }
537
538    false
539}
540
541/// Get the heading level for a line
542#[inline]
543pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
544    if index >= lines.len() {
545        return 0;
546    }
547
548    let line = lines[index];
549
550    // Check for ATX style heading
551    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
552        let hashes = captures.get(2).map_or("", |m| m.as_str());
553        return hashes.len() as u32;
554    }
555
556    // Check for setext style heading
557    if index < lines.len() - 1 {
558        let next_line = lines[index + 1];
559
560        if get_cached_regex(SETEXT_HEADING_1_STR)
561            .map(|re| re.is_match(next_line))
562            .unwrap_or(false)
563        {
564            return 1;
565        }
566
567        if get_cached_regex(SETEXT_HEADING_2_STR)
568            .map(|re| re.is_match(next_line))
569            .unwrap_or(false)
570        {
571            return 2;
572        }
573    }
574
575    0
576}
577
578/// Extract the text content from a heading
579#[inline]
580pub fn extract_heading_text(lines: &[&str], index: usize) -> String {
581    if index >= lines.len() {
582        return String::new();
583    }
584
585    let line = lines[index];
586
587    // Extract from ATX heading
588    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
589        return captures.get(4).map_or("", |m| m.as_str()).trim().to_string();
590    }
591
592    // Extract from setext heading
593    if index < lines.len() - 1 {
594        let next_line = lines[index + 1];
595        let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
596
597        if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
598            .ok()
599            .and_then(|re| re.captures(next_line))
600        {
601            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
602            if underline_indent == line_indentation {
603                return line[line_indentation.len()..].trim().to_string();
604            }
605        }
606
607        if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
608            .ok()
609            .and_then(|re| re.captures(next_line))
610        {
611            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
612            if underline_indent == line_indentation {
613                return line[line_indentation.len()..].trim().to_string();
614            }
615        }
616    }
617
618    line.trim().to_string()
619}
620
621/// Get the indentation of a heading
622#[inline]
623pub fn get_heading_indentation(lines: &[&str], index: usize) -> usize {
624    if index >= lines.len() {
625        return 0;
626    }
627
628    let line = lines[index];
629    line.len() - line.trim_start().len()
630}
631
632/// Check if a line is a code block delimiter
633#[inline]
634pub fn is_code_block_delimiter(line: &str) -> bool {
635    get_cached_regex(FENCED_CODE_BLOCK_START_STR)
636        .map(|re| re.is_match(line))
637        .unwrap_or(false)
638        || get_cached_regex(FENCED_CODE_BLOCK_END_STR)
639            .map(|re| re.is_match(line))
640            .unwrap_or(false)
641}
642
643/// Check if a line is a front matter delimiter
644#[inline]
645pub fn is_front_matter_delimiter(line: &str) -> bool {
646    get_cached_regex(FRONT_MATTER_DELIMITER_STR)
647        .map(|re| re.is_match(line))
648        .unwrap_or(false)
649}
650
651/// Remove trailing hashes from a heading
652#[inline]
653pub fn remove_trailing_hashes(text: &str) -> String {
654    let trimmed = text.trim_end();
655
656    // Find the last hash
657    if let Some(last_hash_index) = trimmed.rfind('#') {
658        // Check if everything after this position is only hashes and whitespace
659        if trimmed[last_hash_index..]
660            .chars()
661            .all(|c| c == '#' || c.is_whitespace())
662        {
663            // Find the start of the trailing hash sequence
664            let mut first_hash_index = last_hash_index;
665            let trimmed_chars: Vec<char> = trimmed.chars().collect();
666            while first_hash_index > 0 {
667                let prev_index = first_hash_index - 1;
668                if prev_index < trimmed_chars.len() && trimmed_chars[prev_index] == '#' {
669                    first_hash_index = prev_index;
670                } else {
671                    break;
672                }
673            }
674
675            // Remove the trailing hashes
676            return trimmed[..first_hash_index].trim_end().to_string();
677        }
678    }
679
680    trimmed.to_string()
681}
682
683/// Normalize a heading to the specified level
684#[inline]
685pub fn normalize_heading(line: &str, level: u32) -> String {
686    let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
687    let trimmed = line.trim_start();
688
689    if trimmed.starts_with('#') {
690        if let Some(text) = HeadingUtils::get_heading_text(line) {
691            format!("{}{} {}", indentation, "#".repeat(level as usize), text)
692        } else {
693            line.to_string()
694        }
695    } else {
696        format!("{}{} {}", indentation, "#".repeat(level as usize), trimmed)
697    }
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703
704    #[test]
705    fn test_atx_heading_parsing() {
706        let content = "# Heading 1\n## Heading 2\n### Heading 3";
707        assert!(HeadingUtils::parse_heading(content, 1).is_some());
708        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
709        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().level, 2);
710        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 3);
711    }
712
713    #[test]
714    fn test_setext_heading_parsing() {
715        let content = "Heading 1\n=========\nHeading 2\n---------";
716        assert!(HeadingUtils::parse_heading(content, 1).is_some());
717        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
718        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 2);
719    }
720
721    #[test]
722    fn test_heading_style_conversion() {
723        assert_eq!(
724            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
725            "# Heading 1"
726        );
727        assert_eq!(
728            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
729            "## Heading 2 ##"
730        );
731        assert_eq!(
732            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
733            "Heading 1\n========="
734        );
735        assert_eq!(
736            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
737            "Heading 2\n---------"
738        );
739    }
740
741    #[test]
742    fn test_code_block_detection() {
743        let content = "# Heading\n```\n# Not a heading\n```\n# Another heading";
744        assert!(!HeadingUtils::is_in_code_block(content, 0));
745        assert!(HeadingUtils::is_in_code_block(content, 2));
746        assert!(!HeadingUtils::is_in_code_block(content, 4));
747    }
748
749    #[test]
750    fn test_empty_line_with_dashes() {
751        // Test that an empty line followed by dashes is not considered a heading
752        let content = "\n---";
753
754        // Empty line is at index 0, dashes at index 1
755        assert_eq!(
756            HeadingUtils::parse_heading(content, 1),
757            None,
758            "Empty line followed by dashes should not be detected as a heading"
759        );
760
761        // Also test with a regular horizontal rule
762        let content2 = "Some content\n\n---\nMore content";
763        assert_eq!(
764            HeadingUtils::parse_heading(content2, 2),
765            None,
766            "Empty line followed by horizontal rule should not be detected as a heading"
767        );
768    }
769
770    #[test]
771    fn test_is_atx_heading() {
772        assert!(HeadingUtils::is_atx_heading("# Heading"));
773        assert!(HeadingUtils::is_atx_heading("## Heading"));
774        assert!(HeadingUtils::is_atx_heading("### Heading"));
775        assert!(HeadingUtils::is_atx_heading("#### Heading"));
776        assert!(HeadingUtils::is_atx_heading("##### Heading"));
777        assert!(HeadingUtils::is_atx_heading("###### Heading"));
778        assert!(HeadingUtils::is_atx_heading("  # Indented"));
779        assert!(HeadingUtils::is_atx_heading("# Heading #"));
780        assert!(HeadingUtils::is_atx_heading("## Heading ###"));
781
782        assert!(!HeadingUtils::is_atx_heading("####### Too many"));
783        assert!(!HeadingUtils::is_atx_heading("Not a heading"));
784        assert!(HeadingUtils::is_atx_heading("#")); // Single # is a valid heading
785        assert!(!HeadingUtils::is_atx_heading(""));
786    }
787
788    #[test]
789    fn test_heading_edge_cases() {
790        // Test invalid line numbers
791        let content = "# Heading";
792        assert!(HeadingUtils::parse_heading(content, 0).is_none());
793        assert!(HeadingUtils::parse_heading(content, 10).is_none());
794
795        // Test headings in code blocks
796        let content = "```\n# Not a heading\n```";
797        assert!(HeadingUtils::parse_heading(content, 2).is_none());
798
799        // Test with tildes for code blocks
800        let content = "~~~\n# Not a heading\n~~~";
801        assert!(HeadingUtils::is_in_code_block(content, 2));
802
803        // Test mixed fence characters
804        let content = "```\n# Content\n~~~"; // Mismatched fences
805        assert!(HeadingUtils::is_in_code_block(content, 2));
806    }
807
808    #[test]
809    fn test_atx_closed_heading_variations() {
810        let content = "# Heading #\n## Heading ##\n### Heading ####\n#### Heading ##";
811        let h1 = HeadingUtils::parse_heading(content, 1).unwrap();
812        assert_eq!(h1.style, HeadingStyle::AtxClosed);
813        assert_eq!(h1.text, "Heading");
814
815        let h2 = HeadingUtils::parse_heading(content, 2).unwrap();
816        assert_eq!(h2.style, HeadingStyle::AtxClosed);
817
818        // Mismatched closing hashes - still ATX but not closed
819        let h3 = HeadingUtils::parse_heading(content, 3).unwrap();
820        assert_eq!(h3.style, HeadingStyle::Atx);
821
822        let h4 = HeadingUtils::parse_heading(content, 4).unwrap();
823        assert_eq!(h4.style, HeadingStyle::Atx);
824    }
825
826    #[test]
827    fn test_setext_heading_edge_cases() {
828        // List item followed by dashes should not be a heading
829        let content = "- List item\n---------";
830        assert!(HeadingUtils::parse_heading(content, 1).is_none());
831
832        // Front matter should not be a heading
833        let content = "---\ntitle: test\n---";
834        assert!(HeadingUtils::parse_heading(content, 1).is_none());
835
836        // Indented setext headings
837        let content = "  Indented\n  ========";
838        let heading = HeadingUtils::parse_heading(content, 1).unwrap();
839        assert_eq!(heading.indentation, "  ");
840        assert_eq!(heading.text, "Indented");
841
842        // Mismatched indentation should not be a heading
843        let content = "  Text\n========"; // No indent on underline
844        assert!(HeadingUtils::parse_heading(content, 1).is_none());
845    }
846
847    #[test]
848    fn test_get_indentation() {
849        assert_eq!(HeadingUtils::get_indentation("# Heading"), 0);
850        assert_eq!(HeadingUtils::get_indentation("  # Heading"), 2);
851        assert_eq!(HeadingUtils::get_indentation("    # Heading"), 4);
852        assert_eq!(HeadingUtils::get_indentation("\t# Heading"), 1);
853        assert_eq!(HeadingUtils::get_indentation(""), 0);
854    }
855
856    #[test]
857    fn test_convert_heading_style_edge_cases() {
858        // Empty text
859        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "");
860        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "");
861
862        // Level clamping
863        assert_eq!(
864            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
865            "# Text"
866        );
867        assert_eq!(
868            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
869            "###### Text"
870        );
871
872        // Setext with level > 2 falls back to ATX
873        assert_eq!(
874            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
875            "### Text"
876        );
877
878        // Preserve indentation
879        assert_eq!(
880            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
881            "  # Text"
882        );
883
884        // Very short text for setext
885        assert_eq!(
886            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
887            "Hi\n=="
888        );
889    }
890
891    #[test]
892    fn test_get_heading_text() {
893        assert_eq!(HeadingUtils::get_heading_text("# Heading"), Some("Heading".to_string()));
894        assert_eq!(
895            HeadingUtils::get_heading_text("## Heading ##"),
896            Some("Heading".to_string())
897        );
898        assert_eq!(
899            HeadingUtils::get_heading_text("###   Spaces   "),
900            Some("Spaces".to_string())
901        );
902        assert_eq!(HeadingUtils::get_heading_text("Not a heading"), None);
903        assert_eq!(HeadingUtils::get_heading_text(""), None);
904    }
905
906    #[test]
907    fn test_emphasis_detection() {
908        assert!(HeadingUtils::is_emphasis_only_line("*emphasis*"));
909        assert!(HeadingUtils::is_emphasis_only_line("_emphasis_"));
910        assert!(HeadingUtils::is_emphasis_only_line("**strong**"));
911        assert!(HeadingUtils::is_emphasis_only_line("__strong__"));
912        assert!(HeadingUtils::is_emphasis_only_line("  *emphasis*  "));
913
914        assert!(!HeadingUtils::is_emphasis_only_line("*not* emphasis"));
915        assert!(!HeadingUtils::is_emphasis_only_line("text *emphasis*"));
916        assert!(!HeadingUtils::is_emphasis_only_line("**"));
917        assert!(!HeadingUtils::is_emphasis_only_line(""));
918    }
919
920    #[test]
921    fn test_extract_emphasis_text() {
922        assert_eq!(
923            HeadingUtils::extract_emphasis_text("*text*"),
924            Some(("text".to_string(), 1))
925        );
926        assert_eq!(
927            HeadingUtils::extract_emphasis_text("_text_"),
928            Some(("text".to_string(), 1))
929        );
930        assert_eq!(
931            HeadingUtils::extract_emphasis_text("**text**"),
932            Some(("text".to_string(), 2))
933        );
934        assert_eq!(
935            HeadingUtils::extract_emphasis_text("__text__"),
936            Some(("text".to_string(), 2))
937        );
938        assert_eq!(
939            HeadingUtils::extract_emphasis_text("  *spaced*  "),
940            Some(("spaced".to_string(), 1))
941        );
942
943        assert_eq!(HeadingUtils::extract_emphasis_text("not emphasis"), None);
944        assert_eq!(HeadingUtils::extract_emphasis_text("*not* complete"), None);
945    }
946
947    #[test]
948    fn test_convert_emphasis_to_heading() {
949        assert_eq!(
950            HeadingUtils::convert_emphasis_to_heading("*text*"),
951            Some("# text".to_string())
952        );
953        assert_eq!(
954            HeadingUtils::convert_emphasis_to_heading("**text**"),
955            Some("## text".to_string())
956        );
957        assert_eq!(
958            HeadingUtils::convert_emphasis_to_heading("  *text*"),
959            Some("  # text".to_string())
960        );
961        assert_eq!(
962            HeadingUtils::convert_emphasis_to_heading("*text* "),
963            Some("# text ".to_string())
964        );
965
966        assert_eq!(HeadingUtils::convert_emphasis_to_heading("not emphasis"), None);
967    }
968
969    #[test]
970    fn test_heading_to_fragment() {
971        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
972        assert_eq!(
973            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
974            "heading-with-numbers-123"
975        );
976        assert_eq!(
977            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
978            "special-characters"
979        );
980        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
981        assert_eq!(
982            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
983            "multiple-spaces"
984        );
985        assert_eq!(
986            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
987            "heading-with-html"
988        );
989        assert_eq!(
990            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
991            "leading-dashes"
992        );
993        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
994    }
995
996    #[test]
997    fn test_is_in_front_matter() {
998        let content = "---\ntitle: Test\n---\n# Content";
999        assert!(HeadingUtils::is_in_front_matter(content, 1));
1000        assert!(!HeadingUtils::is_in_front_matter(content, 2)); // Closing delimiter is not considered in front matter
1001        assert!(!HeadingUtils::is_in_front_matter(content, 3));
1002        assert!(!HeadingUtils::is_in_front_matter(content, 4));
1003
1004        // No front matter
1005        let content = "# Just content";
1006        assert!(!HeadingUtils::is_in_front_matter(content, 0));
1007
1008        // Unclosed front matter
1009        let content = "---\ntitle: Test\n# No closing";
1010        assert!(HeadingUtils::is_in_front_matter(content, 1));
1011        assert!(HeadingUtils::is_in_front_matter(content, 2)); // Still in unclosed front matter
1012
1013        // Front matter not at start
1014        let content = "# Heading\n---\ntitle: Test\n---";
1015        assert!(!HeadingUtils::is_in_front_matter(content, 2));
1016    }
1017
1018    #[test]
1019    fn test_module_level_functions() {
1020        // Test is_heading
1021        assert!(is_heading("# Heading"));
1022        assert!(is_heading("  ## Indented"));
1023        assert!(!is_heading("Not a heading"));
1024        assert!(!is_heading(""));
1025
1026        // Test is_setext_heading_marker
1027        assert!(is_setext_heading_marker("========"));
1028        assert!(is_setext_heading_marker("--------"));
1029        assert!(is_setext_heading_marker("  ======"));
1030        assert!(!is_setext_heading_marker("# Heading"));
1031        assert!(is_setext_heading_marker("---")); // Three dashes is valid
1032
1033        // Test is_setext_heading
1034        let lines = vec!["Title", "====="];
1035        assert!(is_setext_heading(&lines, 0));
1036
1037        let lines = vec!["", "====="];
1038        assert!(!is_setext_heading(&lines, 0));
1039
1040        // Test get_heading_level
1041        let lines = vec!["# H1", "## H2", "### H3"];
1042        assert_eq!(get_heading_level(&lines, 0), 1);
1043        assert_eq!(get_heading_level(&lines, 1), 2);
1044        assert_eq!(get_heading_level(&lines, 2), 3);
1045        assert_eq!(get_heading_level(&lines, 10), 0);
1046
1047        // Test extract_heading_text
1048        let lines = vec!["# Heading Text", "## Another ###"];
1049        assert_eq!(extract_heading_text(&lines, 0), "Heading Text");
1050        assert_eq!(extract_heading_text(&lines, 1), "Another");
1051
1052        // Test get_heading_indentation
1053        let lines = vec!["# No indent", "  ## Two spaces", "    ### Four spaces"];
1054        assert_eq!(get_heading_indentation(&lines, 0), 0);
1055        assert_eq!(get_heading_indentation(&lines, 1), 2);
1056        assert_eq!(get_heading_indentation(&lines, 2), 4);
1057    }
1058
1059    #[test]
1060    fn test_is_code_block_delimiter() {
1061        assert!(is_code_block_delimiter("```"));
1062        assert!(is_code_block_delimiter("~~~"));
1063        assert!(is_code_block_delimiter("````"));
1064        assert!(is_code_block_delimiter("```rust"));
1065        assert!(is_code_block_delimiter("  ```"));
1066
1067        assert!(!is_code_block_delimiter("``")); // Too short
1068        assert!(!is_code_block_delimiter("# Heading"));
1069    }
1070
1071    #[test]
1072    fn test_is_front_matter_delimiter() {
1073        assert!(is_front_matter_delimiter("---"));
1074        assert!(is_front_matter_delimiter("---  "));
1075
1076        assert!(!is_front_matter_delimiter("----"));
1077        assert!(!is_front_matter_delimiter("--"));
1078        assert!(!is_front_matter_delimiter("# ---"));
1079    }
1080
1081    #[test]
1082    fn test_remove_trailing_hashes() {
1083        assert_eq!(remove_trailing_hashes("Heading ###"), "Heading");
1084        assert_eq!(remove_trailing_hashes("Heading ## "), "Heading");
1085        assert_eq!(remove_trailing_hashes("Heading #not trailing"), "Heading #not trailing");
1086        assert_eq!(remove_trailing_hashes("No hashes"), "No hashes");
1087        assert_eq!(remove_trailing_hashes(""), "");
1088
1089        // Test the specific case that was failing
1090        assert_eq!(remove_trailing_hashes("Heading ##"), "Heading");
1091        assert_eq!(remove_trailing_hashes("Heading #"), "Heading");
1092        assert_eq!(remove_trailing_hashes("Heading ####"), "Heading");
1093
1094        // Edge cases
1095        assert_eq!(remove_trailing_hashes("#"), "");
1096        assert_eq!(remove_trailing_hashes("##"), "");
1097        assert_eq!(remove_trailing_hashes("###"), "");
1098        assert_eq!(remove_trailing_hashes("Text#"), "Text");
1099        assert_eq!(remove_trailing_hashes("Text ##"), "Text");
1100    }
1101
1102    #[test]
1103    fn test_normalize_heading() {
1104        assert_eq!(normalize_heading("# Old Level", 3), "### Old Level");
1105        assert_eq!(normalize_heading("## Heading ##", 1), "# Heading");
1106        assert_eq!(normalize_heading("  # Indented", 2), "  ## Indented");
1107        assert_eq!(normalize_heading("Plain text", 1), "# Plain text");
1108    }
1109
1110    #[test]
1111    fn test_heading_style_from_str() {
1112        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
1113        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
1114        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
1115        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
1116        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
1117        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
1118        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
1119        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
1120    }
1121
1122    #[test]
1123    fn test_heading_style_display() {
1124        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
1125        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx_closed");
1126        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
1127        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
1128        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
1129    }
1130
1131    #[test]
1132    fn test_unicode_headings() {
1133        let content = "# 你好世界\n## Ñoño\n### 🚀 Emoji";
1134        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().text, "你好世界");
1135        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().text, "Ñoño");
1136        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().text, "🚀 Emoji");
1137
1138        // Test fragment generation with unicode
1139        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
1140        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
1141    }
1142
1143    #[test]
1144    fn test_complex_nested_structures() {
1145        // Code block inside front matter (edge case)
1146        // The function doesn't handle YAML multi-line strings, so ``` inside front matter
1147        // is treated as a code block start
1148        let content = "---\ncode: |\n  ```\n  # Not a heading\n  ```\n---\n# Real heading";
1149        assert!(HeadingUtils::is_in_code_block(content, 4)); // Inside code block
1150        assert!(HeadingUtils::parse_heading(content, 7).is_some());
1151
1152        // Multiple code blocks
1153        let content = "```\ncode\n```\n# Heading\n~~~\nmore code\n~~~";
1154        assert!(!HeadingUtils::is_in_code_block(content, 4));
1155        assert!(HeadingUtils::parse_heading(content, 4).is_some());
1156    }
1157}