Skip to main content

rumdl_lib/rules/
heading_utils.rs

1use crate::utils::regex_cache::get_cached_regex;
2use std::fmt;
3use std::str::FromStr;
4
5// Regex patterns
6const ATX_PATTERN_STR: &str = r"^(\s*)(#{1,6})(\s*)([^#\n]*?)(?:\s+(#{1,6}))?\s*$";
7const SETEXT_HEADING_1_STR: &str = r"^(\s*)(=+)(\s*)$";
8const SETEXT_HEADING_2_STR: &str = r"^(\s*)(-+)(\s*)$";
9const FENCED_CODE_BLOCK_START_STR: &str = r"^(\s*)(`{3,}|~{3,}).*$";
10const FENCED_CODE_BLOCK_END_STR: &str = r"^(\s*)(`{3,}|~{3,})\s*$";
11const FRONT_MATTER_DELIMITER_STR: &str = r"^---\s*$";
12const HTML_TAG_REGEX_STR: &str = r"<[^>]*>";
13
14// Single line emphasis patterns
15const SINGLE_LINE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*([^*\n]+)\*\s*$";
16const SINGLE_LINE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*_([^_\n]+)_\s*$";
17const SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR: &str = r"^\s*\*\*([^*\n]+)\*\*\s*$";
18const SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR: &str = r"^\s*__([^_\n]+)__\s*$";
19
20/// Represents different styles of Markdown headings
21#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
22pub enum HeadingStyle {
23    Atx,       // # Heading
24    AtxClosed, // # Heading #
25    Setext1,   // Heading
26    // =======
27    Setext2, // Heading
28    // -------
29    Consistent,          // For maintaining consistency with the first found header style
30    SetextWithAtx,       // Setext for h1/h2, ATX for h3-h6
31    SetextWithAtxClosed, // Setext for h1/h2, ATX closed for h3-h6
32}
33
34impl fmt::Display for HeadingStyle {
35    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
36        let s = match self {
37            HeadingStyle::Atx => "atx",
38            HeadingStyle::AtxClosed => "atx-closed",
39            HeadingStyle::Setext1 => "setext1",
40            HeadingStyle::Setext2 => "setext2",
41            HeadingStyle::Consistent => "consistent",
42            HeadingStyle::SetextWithAtx => "setext-with-atx",
43            HeadingStyle::SetextWithAtxClosed => "setext-with-atx-closed",
44        };
45        write!(f, "{s}")
46    }
47}
48
49impl FromStr for HeadingStyle {
50    type Err = ();
51    fn from_str(s: &str) -> Result<Self, Self::Err> {
52        let normalized = s.trim().to_ascii_lowercase().replace('-', "_");
53        match normalized.as_str() {
54            "atx" => Ok(HeadingStyle::Atx),
55            "atx_closed" => Ok(HeadingStyle::AtxClosed),
56            "setext1" | "setext" => Ok(HeadingStyle::Setext1),
57            "setext2" => Ok(HeadingStyle::Setext2),
58            "consistent" => Ok(HeadingStyle::Consistent),
59            "setext_with_atx" => Ok(HeadingStyle::SetextWithAtx),
60            "setext_with_atx_closed" => Ok(HeadingStyle::SetextWithAtxClosed),
61            _ => Err(()),
62        }
63    }
64}
65
66/// Represents a heading in a Markdown document
67#[derive(Debug, Clone, PartialEq)]
68pub struct Heading {
69    pub text: String,
70    pub level: u32,
71    pub style: HeadingStyle,
72    pub line_number: usize,
73    pub original_text: String,
74    pub indentation: String,
75}
76
77/// Utility functions for working with Markdown headings
78pub struct HeadingUtils;
79
80impl HeadingUtils {
81    /// Check if a line is an ATX heading (starts with #)
82    pub fn is_atx_heading(line: &str) -> bool {
83        get_cached_regex(ATX_PATTERN_STR)
84            .map(|re| re.is_match(line))
85            .unwrap_or(false)
86    }
87
88    /// Check if a line is inside a code block
89    pub fn is_in_code_block(content: &str, line_number: usize) -> bool {
90        let mut in_code_block = false;
91        let mut fence_char = None;
92        let mut line_count = 0;
93
94        for line in content.lines() {
95            line_count += 1;
96            if line_count > line_number {
97                break;
98            }
99
100            let trimmed = line.trim();
101            if trimmed.len() >= 3 {
102                let first_chars: Vec<char> = trimmed.chars().take(3).collect();
103                if first_chars.iter().all(|&c| c == '`' || c == '~') {
104                    if let Some(current_fence) = fence_char {
105                        if first_chars[0] == current_fence && first_chars.iter().all(|&c| c == current_fence) {
106                            in_code_block = false;
107                            fence_char = None;
108                        }
109                    } else {
110                        in_code_block = true;
111                        fence_char = Some(first_chars[0]);
112                    }
113                }
114            }
115        }
116
117        in_code_block
118    }
119
120    /// Parse a line into a Heading struct if it's a valid heading
121    pub fn parse_heading(content: &str, line_num: usize) -> Option<Heading> {
122        let lines: Vec<&str> = content.lines().collect();
123        if line_num == 0 || line_num > lines.len() {
124            return None;
125        }
126
127        let line = lines[line_num - 1];
128
129        // Skip if line is within a code block
130        if Self::is_in_code_block(content, line_num) {
131            return None;
132        }
133
134        // Check for ATX style headings
135        if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
136            let indentation = captures.get(1).map_or("", |m| m.as_str()).to_string();
137            let opening_hashes = captures.get(2).map_or("", |m| m.as_str());
138            let level = opening_hashes.len() as u32;
139            let text = captures.get(4).map_or("", |m| m.as_str()).to_string();
140
141            let style = if let Some(closing) = captures.get(5) {
142                let closing_hashes = closing.as_str();
143                if closing_hashes.len() == opening_hashes.len() {
144                    HeadingStyle::AtxClosed
145                } else {
146                    HeadingStyle::Atx
147                }
148            } else {
149                HeadingStyle::Atx
150            };
151
152            let heading = Heading {
153                text: text.clone(),
154                level,
155                style,
156                line_number: line_num,
157                original_text: line.to_string(),
158                indentation: indentation.clone(),
159            };
160            return Some(heading);
161        }
162
163        // Check for Setext style headings
164        if line_num < lines.len() {
165            let next_line = lines[line_num];
166            let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
167
168            // Skip empty lines - don't consider them as potential Setext headings
169            if line.trim().is_empty() {
170                return None;
171            }
172
173            // Skip list items - they shouldn't be considered as potential Setext headings
174            if line.trim_start().starts_with('-')
175                || line.trim_start().starts_with('*')
176                || line.trim_start().starts_with('+')
177                || line.trim_start().starts_with("1.")
178            {
179                return None;
180            }
181
182            // Skip front matter delimiters or lines within front matter
183            if line.trim() == "---" || Self::is_in_front_matter(content, line_num - 1) {
184                return None;
185            }
186
187            if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
188                .ok()
189                .and_then(|re| re.captures(next_line))
190            {
191                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
192                if underline_indent == line_indentation {
193                    let heading = Heading {
194                        text: line[line_indentation.len()..].to_string(),
195                        level: 1,
196                        style: HeadingStyle::Setext1,
197                        line_number: line_num,
198                        original_text: format!("{line}\n{next_line}"),
199                        indentation: line_indentation.clone(),
200                    };
201                    return Some(heading);
202                }
203            } else if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
204                .ok()
205                .and_then(|re| re.captures(next_line))
206            {
207                let underline_indent = captures.get(1).map_or("", |m| m.as_str());
208                if underline_indent == line_indentation {
209                    let heading = Heading {
210                        text: line[line_indentation.len()..].to_string(),
211                        level: 2,
212                        style: HeadingStyle::Setext2,
213                        line_number: line_num,
214                        original_text: format!("{line}\n{next_line}"),
215                        indentation: line_indentation.clone(),
216                    };
217                    return Some(heading);
218                }
219            }
220        }
221
222        None
223    }
224
225    /// Get the indentation level of a line
226    pub fn get_indentation(line: &str) -> usize {
227        line.len() - line.trim_start().len()
228    }
229
230    /// Convert a heading to a different style
231    pub fn convert_heading_style(text_content: &str, level: u32, style: HeadingStyle) -> String {
232        // Validate heading level
233        let level = level.clamp(1, 6);
234
235        if text_content.trim().is_empty() {
236            // Empty headings: ATX can be just `##`, Setext requires text so return empty
237            return match style {
238                HeadingStyle::Atx => "#".repeat(level as usize),
239                HeadingStyle::AtxClosed => {
240                    let hashes = "#".repeat(level as usize);
241                    format!("{hashes} {hashes}")
242                }
243                HeadingStyle::Setext1 | HeadingStyle::Setext2 => String::new(),
244                // These are meta-styles resolved before calling this function
245                HeadingStyle::Consistent | HeadingStyle::SetextWithAtx | HeadingStyle::SetextWithAtxClosed => {
246                    "#".repeat(level as usize)
247                }
248            };
249        }
250
251        let indentation = text_content
252            .chars()
253            .take_while(|c| c.is_whitespace())
254            .collect::<String>();
255        let text_content = text_content.trim();
256
257        match style {
258            HeadingStyle::Atx => {
259                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
260            }
261            HeadingStyle::AtxClosed => {
262                format!(
263                    "{}{} {} {}",
264                    indentation,
265                    "#".repeat(level as usize),
266                    text_content,
267                    "#".repeat(level as usize)
268                )
269            }
270            HeadingStyle::Setext1 | HeadingStyle::Setext2 => {
271                if level > 2 {
272                    // Fall back to ATX style for levels > 2
273                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
274                } else {
275                    let underline_char = if level == 1 || style == HeadingStyle::Setext1 {
276                        '='
277                    } else {
278                        '-'
279                    };
280                    let visible_length = text_content.chars().count();
281                    let underline_length = visible_length.max(1); // Ensure at least 1 underline char
282                    format!(
283                        "{}{}\n{}{}",
284                        indentation,
285                        text_content,
286                        indentation,
287                        underline_char.to_string().repeat(underline_length)
288                    )
289                }
290            }
291            HeadingStyle::Consistent => {
292                // For Consistent style, default to ATX as it's the most commonly used
293                format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
294            }
295            HeadingStyle::SetextWithAtx => {
296                if level <= 2 {
297                    // Use Setext for h1/h2
298                    let underline_char = if level == 1 { '=' } else { '-' };
299                    let visible_length = text_content.chars().count();
300                    let underline_length = visible_length.max(1);
301                    format!(
302                        "{}{}\n{}{}",
303                        indentation,
304                        text_content,
305                        indentation,
306                        underline_char.to_string().repeat(underline_length)
307                    )
308                } else {
309                    // Use ATX for h3-h6
310                    format!("{}{} {}", indentation, "#".repeat(level as usize), text_content)
311                }
312            }
313            HeadingStyle::SetextWithAtxClosed => {
314                if level <= 2 {
315                    // Use Setext for h1/h2
316                    let underline_char = if level == 1 { '=' } else { '-' };
317                    let visible_length = text_content.chars().count();
318                    let underline_length = visible_length.max(1);
319                    format!(
320                        "{}{}\n{}{}",
321                        indentation,
322                        text_content,
323                        indentation,
324                        underline_char.to_string().repeat(underline_length)
325                    )
326                } else {
327                    // Use ATX closed for h3-h6
328                    format!(
329                        "{}{} {} {}",
330                        indentation,
331                        "#".repeat(level as usize),
332                        text_content,
333                        "#".repeat(level as usize)
334                    )
335                }
336            }
337        }
338    }
339
340    /// Get the text content of a heading line
341    pub fn get_heading_text(line: &str) -> Option<String> {
342        get_cached_regex(ATX_PATTERN_STR)
343            .ok()
344            .and_then(|re| re.captures(line))
345            .map(|captures| captures.get(4).map_or("", |m| m.as_str()).trim().to_string())
346    }
347
348    /// Detect emphasis-only lines
349    pub fn is_emphasis_only_line(line: &str) -> bool {
350        let trimmed = line.trim();
351        get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
352            .map(|re| re.is_match(trimmed))
353            .unwrap_or(false)
354            || get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
355                .map(|re| re.is_match(trimmed))
356                .unwrap_or(false)
357            || get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
358                .map(|re| re.is_match(trimmed))
359                .unwrap_or(false)
360            || get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
361                .map(|re| re.is_match(trimmed))
362                .unwrap_or(false)
363    }
364
365    /// Extract text from an emphasis-only line
366    pub fn extract_emphasis_text(line: &str) -> Option<(String, u32)> {
367        let trimmed = line.trim();
368
369        if let Some(caps) = get_cached_regex(SINGLE_LINE_ASTERISK_EMPHASIS_STR)
370            .ok()
371            .and_then(|re| re.captures(trimmed))
372        {
373            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
374        }
375
376        if let Some(caps) = get_cached_regex(SINGLE_LINE_UNDERSCORE_EMPHASIS_STR)
377            .ok()
378            .and_then(|re| re.captures(trimmed))
379        {
380            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 1));
381        }
382
383        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_ASTERISK_EMPHASIS_STR)
384            .ok()
385            .and_then(|re| re.captures(trimmed))
386        {
387            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
388        }
389
390        if let Some(caps) = get_cached_regex(SINGLE_LINE_DOUBLE_UNDERSCORE_EMPHASIS_STR)
391            .ok()
392            .and_then(|re| re.captures(trimmed))
393        {
394            return Some((caps.get(1).unwrap().as_str().trim().to_string(), 2));
395        }
396
397        None
398    }
399
400    /// Convert emphasis to heading
401    pub fn convert_emphasis_to_heading(line: &str) -> Option<String> {
402        // Preserve the original indentation
403        let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
404        // Preserve trailing spaces at the end of the line
405        let trailing = if line.ends_with(" ") {
406            line.chars().rev().take_while(|c| c.is_whitespace()).collect::<String>()
407        } else {
408            String::new()
409        };
410
411        if let Some((text, level)) = Self::extract_emphasis_text(line) {
412            // Preserve the original indentation and trailing spaces
413            Some(format!(
414                "{}{} {}{}",
415                indentation,
416                "#".repeat(level as usize),
417                text,
418                trailing
419            ))
420        } else {
421            None
422        }
423    }
424
425    /// Convert a heading text to a valid ID for fragment links
426    pub fn heading_to_fragment(text: &str) -> String {
427        // Remove any HTML tags
428        let text_no_html = get_cached_regex(HTML_TAG_REGEX_STR)
429            .map(|re| re.replace_all(text, ""))
430            .unwrap_or_else(|_| text.into());
431
432        // Convert to lowercase and trim
433        let text_lower = text_no_html.trim().to_lowercase();
434
435        // Replace spaces and punctuation with hyphens
436        let text_with_hyphens = text_lower
437            .chars()
438            .map(|c| if c.is_alphanumeric() { c } else { '-' })
439            .collect::<String>();
440
441        // Replace multiple consecutive hyphens with a single hyphen
442        let text_clean = text_with_hyphens
443            .split('-')
444            .filter(|s| !s.is_empty())
445            .collect::<Vec<_>>()
446            .join("-");
447
448        // Remove leading and trailing hyphens
449        text_clean.trim_matches('-').to_string()
450    }
451
452    /// Check if a line is in front matter
453    pub fn is_in_front_matter(content: &str, line_number: usize) -> bool {
454        let lines: Vec<&str> = content.lines().collect();
455        if lines.is_empty() || line_number >= lines.len() {
456            return false;
457        }
458
459        // Check if the document starts with front matter
460        if !lines[0].trim_start().eq("---") {
461            return false;
462        }
463
464        let mut in_front_matter = true;
465        let mut found_closing = false;
466
467        // Skip the first line (opening delimiter)
468        for (i, line) in lines.iter().enumerate().skip(1) {
469            if i > line_number {
470                break;
471            }
472
473            if line.trim_start().eq("---") {
474                found_closing = true;
475                in_front_matter = i > line_number;
476                break;
477            }
478        }
479
480        in_front_matter && !found_closing
481    }
482}
483
484/// Checks if a line is a heading
485#[inline]
486pub fn is_heading(line: &str) -> bool {
487    // Fast path checks first
488    let trimmed = line.trim();
489    if trimmed.is_empty() {
490        return false;
491    }
492
493    if trimmed.starts_with('#') {
494        // Check for ATX heading
495        get_cached_regex(ATX_PATTERN_STR)
496            .map(|re| re.is_match(line))
497            .unwrap_or(false)
498    } else {
499        // We can't tell for setext headings without looking at the next line
500        false
501    }
502}
503
504/// Checks if a line is a setext heading marker
505#[inline]
506pub fn is_setext_heading_marker(line: &str) -> bool {
507    get_cached_regex(SETEXT_HEADING_1_STR)
508        .map(|re| re.is_match(line))
509        .unwrap_or(false)
510        || get_cached_regex(SETEXT_HEADING_2_STR)
511            .map(|re| re.is_match(line))
512            .unwrap_or(false)
513}
514
515/// Checks if a line is a setext heading by examining its next line
516#[inline]
517pub fn is_setext_heading(lines: &[&str], index: usize) -> bool {
518    if index >= lines.len() - 1 {
519        return false;
520    }
521
522    let current_line = lines[index];
523    let next_line = lines[index + 1];
524
525    // Skip if current line is empty
526    if current_line.trim().is_empty() {
527        return false;
528    }
529
530    // Check if next line is a setext heading marker with same indentation
531    let current_indentation = current_line
532        .chars()
533        .take_while(|c| c.is_whitespace())
534        .collect::<String>();
535
536    if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
537        .ok()
538        .and_then(|re| re.captures(next_line))
539    {
540        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
541        return underline_indent == current_indentation;
542    }
543
544    if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
545        .ok()
546        .and_then(|re| re.captures(next_line))
547    {
548        let underline_indent = captures.get(1).map_or("", |m| m.as_str());
549        return underline_indent == current_indentation;
550    }
551
552    false
553}
554
555/// Get the heading level for a line
556#[inline]
557pub fn get_heading_level(lines: &[&str], index: usize) -> u32 {
558    if index >= lines.len() {
559        return 0;
560    }
561
562    let line = lines[index];
563
564    // Check for ATX style heading
565    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
566        let hashes = captures.get(2).map_or("", |m| m.as_str());
567        return hashes.len() as u32;
568    }
569
570    // Check for setext style heading
571    if index < lines.len() - 1 {
572        let next_line = lines[index + 1];
573
574        if get_cached_regex(SETEXT_HEADING_1_STR)
575            .map(|re| re.is_match(next_line))
576            .unwrap_or(false)
577        {
578            return 1;
579        }
580
581        if get_cached_regex(SETEXT_HEADING_2_STR)
582            .map(|re| re.is_match(next_line))
583            .unwrap_or(false)
584        {
585            return 2;
586        }
587    }
588
589    0
590}
591
592/// Extract the text content from a heading
593#[inline]
594pub fn extract_heading_text(lines: &[&str], index: usize) -> String {
595    if index >= lines.len() {
596        return String::new();
597    }
598
599    let line = lines[index];
600
601    // Extract from ATX heading
602    if let Some(captures) = get_cached_regex(ATX_PATTERN_STR).ok().and_then(|re| re.captures(line)) {
603        return captures.get(4).map_or("", |m| m.as_str()).trim().to_string();
604    }
605
606    // Extract from setext heading
607    if index < lines.len() - 1 {
608        let next_line = lines[index + 1];
609        let line_indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
610
611        if let Some(captures) = get_cached_regex(SETEXT_HEADING_1_STR)
612            .ok()
613            .and_then(|re| re.captures(next_line))
614        {
615            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
616            if underline_indent == line_indentation {
617                return line[line_indentation.len()..].trim().to_string();
618            }
619        }
620
621        if let Some(captures) = get_cached_regex(SETEXT_HEADING_2_STR)
622            .ok()
623            .and_then(|re| re.captures(next_line))
624        {
625            let underline_indent = captures.get(1).map_or("", |m| m.as_str());
626            if underline_indent == line_indentation {
627                return line[line_indentation.len()..].trim().to_string();
628            }
629        }
630    }
631
632    line.trim().to_string()
633}
634
635/// Get the indentation of a heading
636#[inline]
637pub fn get_heading_indentation(lines: &[&str], index: usize) -> usize {
638    if index >= lines.len() {
639        return 0;
640    }
641
642    let line = lines[index];
643    line.len() - line.trim_start().len()
644}
645
646/// Check if a line is a code block delimiter
647#[inline]
648pub fn is_code_block_delimiter(line: &str) -> bool {
649    get_cached_regex(FENCED_CODE_BLOCK_START_STR)
650        .map(|re| re.is_match(line))
651        .unwrap_or(false)
652        || get_cached_regex(FENCED_CODE_BLOCK_END_STR)
653            .map(|re| re.is_match(line))
654            .unwrap_or(false)
655}
656
657/// Check if a line is a front matter delimiter
658#[inline]
659pub fn is_front_matter_delimiter(line: &str) -> bool {
660    get_cached_regex(FRONT_MATTER_DELIMITER_STR)
661        .map(|re| re.is_match(line))
662        .unwrap_or(false)
663}
664
665/// Remove trailing hashes from a heading
666#[inline]
667pub fn remove_trailing_hashes(text: &str) -> String {
668    let trimmed = text.trim_end();
669
670    // Find the last hash
671    if let Some(last_hash_index) = trimmed.rfind('#') {
672        // Check if everything after this position is only hashes and whitespace
673        if trimmed[last_hash_index..]
674            .chars()
675            .all(|c| c == '#' || c.is_whitespace())
676        {
677            // Find the start of the trailing hash sequence
678            let mut first_hash_index = last_hash_index;
679            let trimmed_chars: Vec<char> = trimmed.chars().collect();
680            while first_hash_index > 0 {
681                let prev_index = first_hash_index - 1;
682                if prev_index < trimmed_chars.len() && trimmed_chars[prev_index] == '#' {
683                    first_hash_index = prev_index;
684                } else {
685                    break;
686                }
687            }
688
689            // Remove the trailing hashes
690            return trimmed[..first_hash_index].trim_end().to_string();
691        }
692    }
693
694    trimmed.to_string()
695}
696
697/// Normalize a heading to the specified level
698#[inline]
699pub fn normalize_heading(line: &str, level: u32) -> String {
700    let indentation = line.chars().take_while(|c| c.is_whitespace()).collect::<String>();
701    let trimmed = line.trim_start();
702
703    if trimmed.starts_with('#') {
704        if let Some(text) = HeadingUtils::get_heading_text(line) {
705            format!("{}{} {}", indentation, "#".repeat(level as usize), text)
706        } else {
707            line.to_string()
708        }
709    } else {
710        format!("{}{} {}", indentation, "#".repeat(level as usize), trimmed)
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use super::*;
717
718    #[test]
719    fn test_atx_heading_parsing() {
720        let content = "# Heading 1\n## Heading 2\n### Heading 3";
721        assert!(HeadingUtils::parse_heading(content, 1).is_some());
722        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
723        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().level, 2);
724        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 3);
725    }
726
727    #[test]
728    fn test_setext_heading_parsing() {
729        let content = "Heading 1\n=========\nHeading 2\n---------";
730        assert!(HeadingUtils::parse_heading(content, 1).is_some());
731        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().level, 1);
732        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().level, 2);
733    }
734
735    #[test]
736    fn test_heading_style_conversion() {
737        assert_eq!(
738            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Atx),
739            "# Heading 1"
740        );
741        assert_eq!(
742            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::AtxClosed),
743            "## Heading 2 ##"
744        );
745        assert_eq!(
746            HeadingUtils::convert_heading_style("Heading 1", 1, HeadingStyle::Setext1),
747            "Heading 1\n========="
748        );
749        assert_eq!(
750            HeadingUtils::convert_heading_style("Heading 2", 2, HeadingStyle::Setext2),
751            "Heading 2\n---------"
752        );
753    }
754
755    #[test]
756    fn test_code_block_detection() {
757        let content = "# Heading\n```\n# Not a heading\n```\n# Another heading";
758        assert!(!HeadingUtils::is_in_code_block(content, 0));
759        assert!(HeadingUtils::is_in_code_block(content, 2));
760        assert!(!HeadingUtils::is_in_code_block(content, 4));
761    }
762
763    #[test]
764    fn test_empty_line_with_dashes() {
765        // Test that an empty line followed by dashes is not considered a heading
766        let content = "\n---";
767
768        // Empty line is at index 0, dashes at index 1
769        assert_eq!(
770            HeadingUtils::parse_heading(content, 1),
771            None,
772            "Empty line followed by dashes should not be detected as a heading"
773        );
774
775        // Also test with a regular horizontal rule
776        let content2 = "Some content\n\n---\nMore content";
777        assert_eq!(
778            HeadingUtils::parse_heading(content2, 2),
779            None,
780            "Empty line followed by horizontal rule should not be detected as a heading"
781        );
782    }
783
784    #[test]
785    fn test_is_atx_heading() {
786        assert!(HeadingUtils::is_atx_heading("# Heading"));
787        assert!(HeadingUtils::is_atx_heading("## Heading"));
788        assert!(HeadingUtils::is_atx_heading("### Heading"));
789        assert!(HeadingUtils::is_atx_heading("#### Heading"));
790        assert!(HeadingUtils::is_atx_heading("##### Heading"));
791        assert!(HeadingUtils::is_atx_heading("###### Heading"));
792        assert!(HeadingUtils::is_atx_heading("  # Indented"));
793        assert!(HeadingUtils::is_atx_heading("# Heading #"));
794        assert!(HeadingUtils::is_atx_heading("## Heading ###"));
795
796        assert!(!HeadingUtils::is_atx_heading("####### Too many"));
797        assert!(!HeadingUtils::is_atx_heading("Not a heading"));
798        assert!(HeadingUtils::is_atx_heading("#")); // Single # is a valid heading
799        assert!(!HeadingUtils::is_atx_heading(""));
800    }
801
802    #[test]
803    fn test_heading_edge_cases() {
804        // Test invalid line numbers
805        let content = "# Heading";
806        assert!(HeadingUtils::parse_heading(content, 0).is_none());
807        assert!(HeadingUtils::parse_heading(content, 10).is_none());
808
809        // Test headings in code blocks
810        let content = "```\n# Not a heading\n```";
811        assert!(HeadingUtils::parse_heading(content, 2).is_none());
812
813        // Test with tildes for code blocks
814        let content = "~~~\n# Not a heading\n~~~";
815        assert!(HeadingUtils::is_in_code_block(content, 2));
816
817        // Test mixed fence characters
818        let content = "```\n# Content\n~~~"; // Mismatched fences
819        assert!(HeadingUtils::is_in_code_block(content, 2));
820    }
821
822    #[test]
823    fn test_atx_closed_heading_variations() {
824        let content = "# Heading #\n## Heading ##\n### Heading ####\n#### Heading ##";
825        let h1 = HeadingUtils::parse_heading(content, 1).unwrap();
826        assert_eq!(h1.style, HeadingStyle::AtxClosed);
827        assert_eq!(h1.text, "Heading");
828
829        let h2 = HeadingUtils::parse_heading(content, 2).unwrap();
830        assert_eq!(h2.style, HeadingStyle::AtxClosed);
831
832        // Mismatched closing hashes - still ATX but not closed
833        let h3 = HeadingUtils::parse_heading(content, 3).unwrap();
834        assert_eq!(h3.style, HeadingStyle::Atx);
835
836        let h4 = HeadingUtils::parse_heading(content, 4).unwrap();
837        assert_eq!(h4.style, HeadingStyle::Atx);
838    }
839
840    #[test]
841    fn test_setext_heading_edge_cases() {
842        // List item followed by dashes should not be a heading
843        let content = "- List item\n---------";
844        assert!(HeadingUtils::parse_heading(content, 1).is_none());
845
846        // Front matter should not be a heading
847        let content = "---\ntitle: test\n---";
848        assert!(HeadingUtils::parse_heading(content, 1).is_none());
849
850        // Indented setext headings
851        let content = "  Indented\n  ========";
852        let heading = HeadingUtils::parse_heading(content, 1).unwrap();
853        assert_eq!(heading.indentation, "  ");
854        assert_eq!(heading.text, "Indented");
855
856        // Mismatched indentation should not be a heading
857        let content = "  Text\n========"; // No indent on underline
858        assert!(HeadingUtils::parse_heading(content, 1).is_none());
859    }
860
861    #[test]
862    fn test_get_indentation() {
863        assert_eq!(HeadingUtils::get_indentation("# Heading"), 0);
864        assert_eq!(HeadingUtils::get_indentation("  # Heading"), 2);
865        assert_eq!(HeadingUtils::get_indentation("    # Heading"), 4);
866        assert_eq!(HeadingUtils::get_indentation("\t# Heading"), 1);
867        assert_eq!(HeadingUtils::get_indentation(""), 0);
868    }
869
870    #[test]
871    fn test_convert_heading_style_edge_cases() {
872        // Empty text: ATX headings produce just the hash marks (valid markdown)
873        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Atx), "#");
874        assert_eq!(HeadingUtils::convert_heading_style("   ", 1, HeadingStyle::Atx), "#");
875        assert_eq!(HeadingUtils::convert_heading_style("", 2, HeadingStyle::Atx), "##");
876        assert_eq!(
877            HeadingUtils::convert_heading_style("", 1, HeadingStyle::AtxClosed),
878            "# #"
879        );
880        // Setext cannot represent empty headings, returns empty
881        assert_eq!(HeadingUtils::convert_heading_style("", 1, HeadingStyle::Setext1), "");
882
883        // Level clamping
884        assert_eq!(
885            HeadingUtils::convert_heading_style("Text", 0, HeadingStyle::Atx),
886            "# Text"
887        );
888        assert_eq!(
889            HeadingUtils::convert_heading_style("Text", 10, HeadingStyle::Atx),
890            "###### Text"
891        );
892
893        // Setext with level > 2 falls back to ATX
894        assert_eq!(
895            HeadingUtils::convert_heading_style("Text", 3, HeadingStyle::Setext1),
896            "### Text"
897        );
898
899        // Preserve indentation
900        assert_eq!(
901            HeadingUtils::convert_heading_style("  Text", 1, HeadingStyle::Atx),
902            "  # Text"
903        );
904
905        // Very short text for setext
906        assert_eq!(
907            HeadingUtils::convert_heading_style("Hi", 1, HeadingStyle::Setext1),
908            "Hi\n=="
909        );
910    }
911
912    #[test]
913    fn test_get_heading_text() {
914        assert_eq!(HeadingUtils::get_heading_text("# Heading"), Some("Heading".to_string()));
915        assert_eq!(
916            HeadingUtils::get_heading_text("## Heading ##"),
917            Some("Heading".to_string())
918        );
919        assert_eq!(
920            HeadingUtils::get_heading_text("###   Spaces   "),
921            Some("Spaces".to_string())
922        );
923        assert_eq!(HeadingUtils::get_heading_text("Not a heading"), None);
924        assert_eq!(HeadingUtils::get_heading_text(""), None);
925    }
926
927    #[test]
928    fn test_emphasis_detection() {
929        assert!(HeadingUtils::is_emphasis_only_line("*emphasis*"));
930        assert!(HeadingUtils::is_emphasis_only_line("_emphasis_"));
931        assert!(HeadingUtils::is_emphasis_only_line("**strong**"));
932        assert!(HeadingUtils::is_emphasis_only_line("__strong__"));
933        assert!(HeadingUtils::is_emphasis_only_line("  *emphasis*  "));
934
935        assert!(!HeadingUtils::is_emphasis_only_line("*not* emphasis"));
936        assert!(!HeadingUtils::is_emphasis_only_line("text *emphasis*"));
937        assert!(!HeadingUtils::is_emphasis_only_line("**"));
938        assert!(!HeadingUtils::is_emphasis_only_line(""));
939    }
940
941    #[test]
942    fn test_extract_emphasis_text() {
943        assert_eq!(
944            HeadingUtils::extract_emphasis_text("*text*"),
945            Some(("text".to_string(), 1))
946        );
947        assert_eq!(
948            HeadingUtils::extract_emphasis_text("_text_"),
949            Some(("text".to_string(), 1))
950        );
951        assert_eq!(
952            HeadingUtils::extract_emphasis_text("**text**"),
953            Some(("text".to_string(), 2))
954        );
955        assert_eq!(
956            HeadingUtils::extract_emphasis_text("__text__"),
957            Some(("text".to_string(), 2))
958        );
959        assert_eq!(
960            HeadingUtils::extract_emphasis_text("  *spaced*  "),
961            Some(("spaced".to_string(), 1))
962        );
963
964        assert_eq!(HeadingUtils::extract_emphasis_text("not emphasis"), None);
965        assert_eq!(HeadingUtils::extract_emphasis_text("*not* complete"), None);
966    }
967
968    #[test]
969    fn test_convert_emphasis_to_heading() {
970        assert_eq!(
971            HeadingUtils::convert_emphasis_to_heading("*text*"),
972            Some("# text".to_string())
973        );
974        assert_eq!(
975            HeadingUtils::convert_emphasis_to_heading("**text**"),
976            Some("## text".to_string())
977        );
978        assert_eq!(
979            HeadingUtils::convert_emphasis_to_heading("  *text*"),
980            Some("  # text".to_string())
981        );
982        assert_eq!(
983            HeadingUtils::convert_emphasis_to_heading("*text* "),
984            Some("# text ".to_string())
985        );
986
987        assert_eq!(HeadingUtils::convert_emphasis_to_heading("not emphasis"), None);
988    }
989
990    #[test]
991    fn test_heading_to_fragment() {
992        assert_eq!(HeadingUtils::heading_to_fragment("Simple Heading"), "simple-heading");
993        assert_eq!(
994            HeadingUtils::heading_to_fragment("Heading with Numbers 123"),
995            "heading-with-numbers-123"
996        );
997        assert_eq!(
998            HeadingUtils::heading_to_fragment("Special!@#$%Characters"),
999            "special-characters"
1000        );
1001        assert_eq!(HeadingUtils::heading_to_fragment("  Trimmed  "), "trimmed");
1002        assert_eq!(
1003            HeadingUtils::heading_to_fragment("Multiple   Spaces"),
1004            "multiple-spaces"
1005        );
1006        assert_eq!(
1007            HeadingUtils::heading_to_fragment("Heading <em>with HTML</em>"),
1008            "heading-with-html"
1009        );
1010        assert_eq!(
1011            HeadingUtils::heading_to_fragment("---Leading-Dashes---"),
1012            "leading-dashes"
1013        );
1014        assert_eq!(HeadingUtils::heading_to_fragment(""), "");
1015    }
1016
1017    #[test]
1018    fn test_is_in_front_matter() {
1019        let content = "---\ntitle: Test\n---\n# Content";
1020        assert!(HeadingUtils::is_in_front_matter(content, 1));
1021        assert!(!HeadingUtils::is_in_front_matter(content, 2)); // Closing delimiter is not considered in front matter
1022        assert!(!HeadingUtils::is_in_front_matter(content, 3));
1023        assert!(!HeadingUtils::is_in_front_matter(content, 4));
1024
1025        // No front matter
1026        let content = "# Just content";
1027        assert!(!HeadingUtils::is_in_front_matter(content, 0));
1028
1029        // Unclosed front matter
1030        let content = "---\ntitle: Test\n# No closing";
1031        assert!(HeadingUtils::is_in_front_matter(content, 1));
1032        assert!(HeadingUtils::is_in_front_matter(content, 2)); // Still in unclosed front matter
1033
1034        // Front matter not at start
1035        let content = "# Heading\n---\ntitle: Test\n---";
1036        assert!(!HeadingUtils::is_in_front_matter(content, 2));
1037    }
1038
1039    #[test]
1040    fn test_module_level_functions() {
1041        // Test is_heading
1042        assert!(is_heading("# Heading"));
1043        assert!(is_heading("  ## Indented"));
1044        assert!(!is_heading("Not a heading"));
1045        assert!(!is_heading(""));
1046
1047        // Test is_setext_heading_marker
1048        assert!(is_setext_heading_marker("========"));
1049        assert!(is_setext_heading_marker("--------"));
1050        assert!(is_setext_heading_marker("  ======"));
1051        assert!(!is_setext_heading_marker("# Heading"));
1052        assert!(is_setext_heading_marker("---")); // Three dashes is valid
1053
1054        // Test is_setext_heading
1055        let lines = vec!["Title", "====="];
1056        assert!(is_setext_heading(&lines, 0));
1057
1058        let lines = vec!["", "====="];
1059        assert!(!is_setext_heading(&lines, 0));
1060
1061        // Test get_heading_level
1062        let lines = vec!["# H1", "## H2", "### H3"];
1063        assert_eq!(get_heading_level(&lines, 0), 1);
1064        assert_eq!(get_heading_level(&lines, 1), 2);
1065        assert_eq!(get_heading_level(&lines, 2), 3);
1066        assert_eq!(get_heading_level(&lines, 10), 0);
1067
1068        // Test extract_heading_text
1069        let lines = vec!["# Heading Text", "## Another ###"];
1070        assert_eq!(extract_heading_text(&lines, 0), "Heading Text");
1071        assert_eq!(extract_heading_text(&lines, 1), "Another");
1072
1073        // Test get_heading_indentation
1074        let lines = vec!["# No indent", "  ## Two spaces", "    ### Four spaces"];
1075        assert_eq!(get_heading_indentation(&lines, 0), 0);
1076        assert_eq!(get_heading_indentation(&lines, 1), 2);
1077        assert_eq!(get_heading_indentation(&lines, 2), 4);
1078    }
1079
1080    #[test]
1081    fn test_is_code_block_delimiter() {
1082        assert!(is_code_block_delimiter("```"));
1083        assert!(is_code_block_delimiter("~~~"));
1084        assert!(is_code_block_delimiter("````"));
1085        assert!(is_code_block_delimiter("```rust"));
1086        assert!(is_code_block_delimiter("  ```"));
1087
1088        assert!(!is_code_block_delimiter("``")); // Too short
1089        assert!(!is_code_block_delimiter("# Heading"));
1090    }
1091
1092    #[test]
1093    fn test_is_front_matter_delimiter() {
1094        assert!(is_front_matter_delimiter("---"));
1095        assert!(is_front_matter_delimiter("---  "));
1096
1097        assert!(!is_front_matter_delimiter("----"));
1098        assert!(!is_front_matter_delimiter("--"));
1099        assert!(!is_front_matter_delimiter("# ---"));
1100    }
1101
1102    #[test]
1103    fn test_remove_trailing_hashes() {
1104        assert_eq!(remove_trailing_hashes("Heading ###"), "Heading");
1105        assert_eq!(remove_trailing_hashes("Heading ## "), "Heading");
1106        assert_eq!(remove_trailing_hashes("Heading #not trailing"), "Heading #not trailing");
1107        assert_eq!(remove_trailing_hashes("No hashes"), "No hashes");
1108        assert_eq!(remove_trailing_hashes(""), "");
1109
1110        // Test the specific case that was failing
1111        assert_eq!(remove_trailing_hashes("Heading ##"), "Heading");
1112        assert_eq!(remove_trailing_hashes("Heading #"), "Heading");
1113        assert_eq!(remove_trailing_hashes("Heading ####"), "Heading");
1114
1115        // Edge cases
1116        assert_eq!(remove_trailing_hashes("#"), "");
1117        assert_eq!(remove_trailing_hashes("##"), "");
1118        assert_eq!(remove_trailing_hashes("###"), "");
1119        assert_eq!(remove_trailing_hashes("Text#"), "Text");
1120        assert_eq!(remove_trailing_hashes("Text ##"), "Text");
1121    }
1122
1123    #[test]
1124    fn test_normalize_heading() {
1125        assert_eq!(normalize_heading("# Old Level", 3), "### Old Level");
1126        assert_eq!(normalize_heading("## Heading ##", 1), "# Heading");
1127        assert_eq!(normalize_heading("  # Indented", 2), "  ## Indented");
1128        assert_eq!(normalize_heading("Plain text", 1), "# Plain text");
1129    }
1130
1131    #[test]
1132    fn test_heading_style_from_str() {
1133        assert_eq!(HeadingStyle::from_str("atx"), Ok(HeadingStyle::Atx));
1134        assert_eq!(HeadingStyle::from_str("ATX"), Ok(HeadingStyle::Atx));
1135        assert_eq!(HeadingStyle::from_str("atx_closed"), Ok(HeadingStyle::AtxClosed));
1136        assert_eq!(HeadingStyle::from_str("atx-closed"), Ok(HeadingStyle::AtxClosed));
1137        assert_eq!(HeadingStyle::from_str("ATX-CLOSED"), Ok(HeadingStyle::AtxClosed));
1138        assert_eq!(HeadingStyle::from_str("setext1"), Ok(HeadingStyle::Setext1));
1139        assert_eq!(HeadingStyle::from_str("setext"), Ok(HeadingStyle::Setext1));
1140        assert_eq!(HeadingStyle::from_str("setext2"), Ok(HeadingStyle::Setext2));
1141        assert_eq!(HeadingStyle::from_str("consistent"), Ok(HeadingStyle::Consistent));
1142        assert_eq!(
1143            HeadingStyle::from_str("setext_with_atx"),
1144            Ok(HeadingStyle::SetextWithAtx)
1145        );
1146        assert_eq!(
1147            HeadingStyle::from_str("setext-with-atx"),
1148            Ok(HeadingStyle::SetextWithAtx)
1149        );
1150        assert_eq!(
1151            HeadingStyle::from_str("setext_with_atx_closed"),
1152            Ok(HeadingStyle::SetextWithAtxClosed)
1153        );
1154        assert_eq!(
1155            HeadingStyle::from_str("setext-with-atx-closed"),
1156            Ok(HeadingStyle::SetextWithAtxClosed)
1157        );
1158        assert_eq!(HeadingStyle::from_str("invalid"), Err(()));
1159    }
1160
1161    #[test]
1162    fn test_heading_style_display() {
1163        assert_eq!(HeadingStyle::Atx.to_string(), "atx");
1164        assert_eq!(HeadingStyle::AtxClosed.to_string(), "atx-closed");
1165        assert_eq!(HeadingStyle::Setext1.to_string(), "setext1");
1166        assert_eq!(HeadingStyle::Setext2.to_string(), "setext2");
1167        assert_eq!(HeadingStyle::Consistent.to_string(), "consistent");
1168    }
1169
1170    #[test]
1171    fn test_unicode_headings() {
1172        let content = "# 你好世界\n## Ñoño\n### 🚀 Emoji";
1173        assert_eq!(HeadingUtils::parse_heading(content, 1).unwrap().text, "你好世界");
1174        assert_eq!(HeadingUtils::parse_heading(content, 2).unwrap().text, "Ñoño");
1175        assert_eq!(HeadingUtils::parse_heading(content, 3).unwrap().text, "🚀 Emoji");
1176
1177        // Test fragment generation with unicode
1178        assert_eq!(HeadingUtils::heading_to_fragment("你好世界"), "你好世界");
1179        assert_eq!(HeadingUtils::heading_to_fragment("Café René"), "café-rené");
1180    }
1181
1182    #[test]
1183    fn test_complex_nested_structures() {
1184        // Code block inside front matter (edge case)
1185        // The function doesn't handle YAML multi-line strings, so ``` inside front matter
1186        // is treated as a code block start
1187        let content = "---\ncode: |\n  ```\n  # Not a heading\n  ```\n---\n# Real heading";
1188        assert!(HeadingUtils::is_in_code_block(content, 4)); // Inside code block
1189        assert!(HeadingUtils::parse_heading(content, 7).is_some());
1190
1191        // Multiple code blocks
1192        let content = "```\ncode\n```\n# Heading\n~~~\nmore code\n~~~";
1193        assert!(!HeadingUtils::is_in_code_block(content, 4));
1194        assert!(HeadingUtils::parse_heading(content, 4).is_some());
1195    }
1196}