rumdl_lib/utils/
code_block_utils.rs

1//!
2//! Utility functions for detecting and handling code blocks and code spans in Markdown for rumdl.
3//!
4//! Code block detection is delegated to pulldown-cmark, which correctly implements the
5//! CommonMark specification. This handles edge cases like:
6//! - Backtick fences with backticks in the info string (invalid per spec)
7//! - Nested fences (longer fence contains shorter fence as content)
8//! - Mixed fence types (tilde fence contains backticks as content)
9//! - Indented code blocks with proper list context handling
10
11use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13/// Type alias for code block and span ranges: (code_blocks, code_spans)
14pub type CodeRanges = (Vec<(usize, usize)>, Vec<(usize, usize)>);
15
16/// Classification of code blocks relative to list contexts
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum CodeBlockContext {
19    /// Code block that separates lists (root-level, with blank lines)
20    Standalone,
21    /// Code block that continues a list (properly indented)
22    Indented,
23    /// Code block adjacent to list content (edge case, defaults to non-breaking)
24    Adjacent,
25}
26
27/// Utility functions for detecting and handling code blocks in Markdown
28pub struct CodeBlockUtils;
29
30impl CodeBlockUtils {
31    /// Detect all code blocks in the content (NOT including inline code spans)
32    ///
33    /// Uses pulldown-cmark for spec-compliant CommonMark parsing. This correctly handles:
34    /// - Fenced code blocks (``` and ~~~)
35    /// - Indented code blocks (4 spaces or tab)
36    /// - Code blocks inside lists, blockquotes, and other containers
37    /// - Edge cases like backticks in info strings (which invalidate the fence)
38    ///
39    /// Returns a sorted vector of (start, end) byte offset tuples.
40    pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
41        let (blocks, _) = Self::detect_code_blocks_and_spans(content);
42        blocks
43    }
44
45    /// Returns code block ranges and inline code span ranges in a single pulldown-cmark pass.
46    pub fn detect_code_blocks_and_spans(content: &str) -> CodeRanges {
47        let mut blocks = Vec::new();
48        let mut spans = Vec::new();
49        let mut code_block_start: Option<usize> = None;
50
51        // Use pulldown-cmark with all extensions for maximum compatibility
52        let options = Options::all();
53        let parser = Parser::new_ext(content, options).into_offset_iter();
54
55        for (event, range) in parser {
56            match event {
57                Event::Start(Tag::CodeBlock(_)) => {
58                    // Record start position of code block
59                    code_block_start = Some(range.start);
60                }
61                Event::End(TagEnd::CodeBlock) => {
62                    // Complete the code block range
63                    if let Some(start) = code_block_start.take() {
64                        blocks.push((start, range.end));
65                    }
66                }
67                Event::Code(_) => {
68                    spans.push((range.start, range.end));
69                }
70                _ => {}
71            }
72        }
73
74        // Handle edge case: unclosed code block at end of content
75        // pulldown-cmark should handle this, but be defensive
76        if let Some(start) = code_block_start {
77            blocks.push((start, content.len()));
78        }
79
80        // Sort by start position (should already be sorted, but ensure consistency)
81        blocks.sort_by_key(|&(start, _)| start);
82        (blocks, spans)
83    }
84
85    /// Check if a position is within a code block (for compatibility)
86    pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
87        // This is a compatibility function - it only checks code blocks now, not spans
88        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
89    }
90
91    /// Check if a position is within a code block (NOT including inline code spans)
92    pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
93        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
94    }
95
96    /// Analyze code block context relative to list parsing
97    /// This is the core function implementing Design #3's three-tier classification
98    pub fn analyze_code_block_context(
99        lines: &[crate::lint_context::LineInfo],
100        line_idx: usize,
101        min_continuation_indent: usize,
102    ) -> CodeBlockContext {
103        if let Some(line_info) = lines.get(line_idx) {
104            // Rule 1: Indentation Analysis - Is it sufficiently indented for list continuation?
105            if line_info.indent >= min_continuation_indent {
106                return CodeBlockContext::Indented;
107            }
108
109            // Rule 2: Blank Line Context - Check for structural separation indicators
110            let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
111
112            // Rule 3: Standalone Detection - Insufficient indentation + blank line separation
113            // This is the key fix: root-level code blocks with blank lines separate lists
114            if prev_blanks > 0 || next_blanks > 0 {
115                return CodeBlockContext::Standalone;
116            }
117
118            // Rule 4: Default - Adjacent (conservative, non-breaking for edge cases)
119            CodeBlockContext::Adjacent
120        } else {
121            // Fallback for invalid line index
122            CodeBlockContext::Adjacent
123        }
124    }
125
126    /// Count blank lines before and after the given line index
127    fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
128        let mut prev_blanks = 0;
129        let mut next_blanks = 0;
130
131        // Count blank lines before (look backwards)
132        for i in (0..line_idx).rev() {
133            if let Some(line) = lines.get(i) {
134                if line.is_blank {
135                    prev_blanks += 1;
136                } else {
137                    break;
138                }
139            } else {
140                break;
141            }
142        }
143
144        // Count blank lines after (look forwards)
145        for i in (line_idx + 1)..lines.len() {
146            if let Some(line) = lines.get(i) {
147                if line.is_blank {
148                    next_blanks += 1;
149                } else {
150                    break;
151                }
152            } else {
153                break;
154            }
155        }
156
157        (prev_blanks, next_blanks)
158    }
159
160    /// Calculate minimum indentation required for code block to continue a list
161    /// Based on the most recent list item's marker width
162    pub fn calculate_min_continuation_indent(
163        content: &str,
164        lines: &[crate::lint_context::LineInfo],
165        current_line_idx: usize,
166    ) -> usize {
167        // Look backwards to find the most recent list item
168        for i in (0..current_line_idx).rev() {
169            if let Some(line_info) = lines.get(i) {
170                if let Some(list_item) = &line_info.list_item {
171                    // Calculate minimum continuation indent for this list item
172                    return if list_item.is_ordered {
173                        list_item.marker_column + list_item.marker.len() + 1 // +1 for space after marker
174                    } else {
175                        list_item.marker_column + 2 // Unordered lists need marker + space (min 2)
176                    };
177                }
178
179                // Stop at structural separators that would break list context
180                if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
181                    break;
182                }
183            }
184        }
185
186        0 // No list context found
187    }
188
189    /// Check if content is a structural separator (headings, horizontal rules, etc.)
190    fn is_structural_separator(content: &str) -> bool {
191        let trimmed = content.trim();
192        trimmed.starts_with("---")
193            || trimmed.starts_with("***")
194            || trimmed.starts_with("___")
195            || crate::utils::skip_context::is_table_line(trimmed)
196            || trimmed.starts_with(">") // Blockquotes
197    }
198
199    /// Detect fenced code blocks with markdown/md language tag.
200    ///
201    /// Returns a vector of `MarkdownCodeBlock` containing byte ranges for the
202    /// content between the fences (excluding the fence lines themselves).
203    ///
204    /// Only detects fenced code blocks (``` or ~~~), not indented code blocks,
205    /// since indented blocks don't have a language tag.
206    pub fn detect_markdown_code_blocks(content: &str) -> Vec<MarkdownCodeBlock> {
207        use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
208
209        let mut blocks = Vec::new();
210        let mut current_block: Option<MarkdownCodeBlockBuilder> = None;
211
212        let options = Options::all();
213        let parser = Parser::new_ext(content, options).into_offset_iter();
214
215        for (event, range) in parser {
216            match event {
217                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
218                    // Check if language is markdown or md (first word of info string)
219                    let language = info.split_whitespace().next().unwrap_or("");
220                    if language.eq_ignore_ascii_case("markdown") || language.eq_ignore_ascii_case("md") {
221                        // Find where content starts (after the opening fence line)
222                        let block_start = range.start;
223                        let content_start = content[block_start..]
224                            .find('\n')
225                            .map(|i| block_start + i + 1)
226                            .unwrap_or(content.len());
227
228                        current_block = Some(MarkdownCodeBlockBuilder { content_start });
229                    }
230                }
231                Event::End(TagEnd::CodeBlock) => {
232                    if let Some(builder) = current_block.take() {
233                        // Find where content ends (before the closing fence line)
234                        let block_end = range.end;
235
236                        // Validate range before slicing
237                        if builder.content_start > block_end || builder.content_start > content.len() {
238                            continue;
239                        }
240
241                        let search_range = &content[builder.content_start..block_end.min(content.len())];
242                        let content_end = search_range
243                            .rfind('\n')
244                            .map(|i| builder.content_start + i)
245                            .unwrap_or(builder.content_start);
246
247                        // Only add block if it has valid content range
248                        if content_end >= builder.content_start {
249                            blocks.push(MarkdownCodeBlock {
250                                content_start: builder.content_start,
251                                content_end,
252                            });
253                        }
254                    }
255                }
256                _ => {}
257            }
258        }
259
260        blocks
261    }
262}
263
264/// Information about a markdown code block for recursive formatting
265#[derive(Debug, Clone)]
266pub struct MarkdownCodeBlock {
267    /// Byte offset where the content starts (after opening fence line)
268    pub content_start: usize,
269    /// Byte offset where the content ends (before closing fence line)
270    pub content_end: usize,
271}
272
273/// Builder for MarkdownCodeBlock during parsing
274struct MarkdownCodeBlockBuilder {
275    content_start: usize,
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_detect_fenced_code_blocks() {
284        // The function detects fenced blocks and inline code spans
285        // Fence markers (``` at line start) are now skipped in inline span detection
286
287        // Basic fenced code block with backticks
288        let content = "Some text\n```\ncode here\n```\nMore text";
289        let blocks = CodeBlockUtils::detect_code_blocks(content);
290        // Should find: 1 fenced block (fences are no longer detected as inline spans)
291        assert_eq!(blocks.len(), 1);
292
293        // Check that we have the fenced block
294        let fenced_block = blocks
295            .iter()
296            .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
297        assert!(fenced_block.is_some());
298
299        // Fenced code block with tildes (no inline code detection for ~)
300        let content = "Some text\n~~~\ncode here\n~~~\nMore text";
301        let blocks = CodeBlockUtils::detect_code_blocks(content);
302        assert_eq!(blocks.len(), 1);
303        assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
304
305        // Multiple code blocks
306        let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
307        let blocks = CodeBlockUtils::detect_code_blocks(content);
308        // 2 fenced blocks (fence markers no longer detected as inline spans)
309        assert_eq!(blocks.len(), 2);
310    }
311
312    #[test]
313    fn test_detect_code_blocks_with_language() {
314        // Code block with language identifier
315        let content = "Text\n```rust\nfn main() {}\n```\nMore";
316        let blocks = CodeBlockUtils::detect_code_blocks(content);
317        // 1 fenced block (fence markers no longer detected as inline spans)
318        assert_eq!(blocks.len(), 1);
319        // Check we have the full fenced block
320        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
321        assert!(fenced.is_some());
322    }
323
324    #[test]
325    fn test_unclosed_code_block() {
326        // Unclosed code block should extend to end of content
327        let content = "Text\n```\ncode here\nno closing fence";
328        let blocks = CodeBlockUtils::detect_code_blocks(content);
329        assert_eq!(blocks.len(), 1);
330        assert_eq!(blocks[0].1, content.len());
331    }
332
333    #[test]
334    fn test_indented_code_blocks() {
335        // Basic indented code block
336        let content = "Paragraph\n\n    code line 1\n    code line 2\n\nMore text";
337        let blocks = CodeBlockUtils::detect_code_blocks(content);
338        assert_eq!(blocks.len(), 1);
339        assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
340        assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
341
342        // Indented code with tabs
343        let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
344        let blocks = CodeBlockUtils::detect_code_blocks(content);
345        assert_eq!(blocks.len(), 1);
346    }
347
348    #[test]
349    fn test_indented_code_requires_blank_line() {
350        // Indented lines without preceding blank line are not code blocks
351        let content = "Paragraph\n    indented but not code\nMore text";
352        let blocks = CodeBlockUtils::detect_code_blocks(content);
353        assert_eq!(blocks.len(), 0);
354
355        // With blank line, it becomes a code block
356        let content = "Paragraph\n\n    now it's code\nMore text";
357        let blocks = CodeBlockUtils::detect_code_blocks(content);
358        assert_eq!(blocks.len(), 1);
359    }
360
361    #[test]
362    fn test_indented_content_with_list_markers_is_code_block() {
363        // Per CommonMark spec: 4-space indented content after blank line IS a code block,
364        // even if the content looks like list markers. The indentation takes precedence.
365        // Verified with: echo 'List:\n\n    - Item 1' | npx commonmark
366        // Output: <pre><code>- Item 1</code></pre>
367        let content = "List:\n\n    - Item 1\n    - Item 2\n    * Item 3\n    + Item 4";
368        let blocks = CodeBlockUtils::detect_code_blocks(content);
369        assert_eq!(blocks.len(), 1); // This IS a code block per spec
370
371        // Same for numbered list markers
372        let content = "List:\n\n    1. First\n    2. Second";
373        let blocks = CodeBlockUtils::detect_code_blocks(content);
374        assert_eq!(blocks.len(), 1); // This IS a code block per spec
375    }
376
377    #[test]
378    fn test_actual_list_items_not_code_blocks() {
379        // Actual list items (no preceding blank line + 4 spaces) are NOT code blocks
380        let content = "- Item 1\n- Item 2\n* Item 3";
381        let blocks = CodeBlockUtils::detect_code_blocks(content);
382        assert_eq!(blocks.len(), 0);
383
384        // Nested list items
385        let content = "- Item 1\n  - Nested item\n- Item 2";
386        let blocks = CodeBlockUtils::detect_code_blocks(content);
387        assert_eq!(blocks.len(), 0);
388    }
389
390    #[test]
391    fn test_inline_code_spans_not_detected() {
392        // Inline code spans should NOT be detected as code blocks
393        let content = "Text with `inline code` here";
394        let blocks = CodeBlockUtils::detect_code_blocks(content);
395        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
396
397        // Multiple backtick code span
398        let content = "Text with ``code with ` backtick`` here";
399        let blocks = CodeBlockUtils::detect_code_blocks(content);
400        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
401
402        // Multiple code spans
403        let content = "Has `code1` and `code2` spans";
404        let blocks = CodeBlockUtils::detect_code_blocks(content);
405        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
406    }
407
408    #[test]
409    fn test_unclosed_code_span() {
410        // Unclosed code span should not be detected
411        let content = "Text with `unclosed code span";
412        let blocks = CodeBlockUtils::detect_code_blocks(content);
413        assert_eq!(blocks.len(), 0);
414
415        // Mismatched backticks
416        let content = "Text with ``one style` different close";
417        let blocks = CodeBlockUtils::detect_code_blocks(content);
418        assert_eq!(blocks.len(), 0);
419    }
420
421    #[test]
422    fn test_mixed_code_blocks_and_spans() {
423        let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
424        let blocks = CodeBlockUtils::detect_code_blocks(content);
425        // Should only detect the fenced block, NOT the inline spans
426        assert_eq!(blocks.len(), 1);
427
428        // Check we have the fenced block only
429        assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
430        // Should NOT detect inline spans
431        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
432        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
433    }
434
435    #[test]
436    fn test_is_in_code_block_or_span() {
437        let blocks = vec![(10, 20), (30, 40), (50, 60)];
438
439        // Test positions inside blocks
440        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
441        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
442        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
443
444        // Test positions at boundaries
445        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); // Start is inclusive
446        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); // End is exclusive
447
448        // Test positions outside blocks
449        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
450        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
451        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
452    }
453
454    #[test]
455    fn test_empty_content() {
456        let blocks = CodeBlockUtils::detect_code_blocks("");
457        assert_eq!(blocks.len(), 0);
458    }
459
460    #[test]
461    fn test_code_block_at_start() {
462        let content = "```\ncode\n```\nText after";
463        let blocks = CodeBlockUtils::detect_code_blocks(content);
464        // 1 fenced block (fence markers no longer detected as inline spans)
465        assert_eq!(blocks.len(), 1);
466        assert_eq!(blocks[0].0, 0); // Fenced block starts at 0
467    }
468
469    #[test]
470    fn test_code_block_at_end() {
471        let content = "Text before\n```\ncode\n```";
472        let blocks = CodeBlockUtils::detect_code_blocks(content);
473        // 1 fenced block (fence markers no longer detected as inline spans)
474        assert_eq!(blocks.len(), 1);
475        // Check we have the fenced block
476        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
477        assert!(fenced.is_some());
478    }
479
480    #[test]
481    fn test_nested_fence_markers() {
482        // Code block containing fence markers as content
483        let content = "Text\n````\n```\nnested\n```\n````\nAfter";
484        let blocks = CodeBlockUtils::detect_code_blocks(content);
485        // Should detect: outer block, inner ```, outer ````
486        assert!(!blocks.is_empty());
487        // Check we have the outer block
488        let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
489        assert!(outer.is_some());
490    }
491
492    #[test]
493    fn test_indented_code_with_blank_lines() {
494        // Indented code blocks can contain blank lines
495        let content = "Text\n\n    line1\n\n    line2\n\nAfter";
496        let blocks = CodeBlockUtils::detect_code_blocks(content);
497        // May have multiple blocks due to blank line handling
498        assert!(!blocks.is_empty());
499        // Check that we captured the indented code
500        let all_content: String = blocks
501            .iter()
502            .map(|(s, e)| &content[*s..*e])
503            .collect::<Vec<_>>()
504            .join("");
505        assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
506    }
507
508    #[test]
509    fn test_code_span_with_spaces() {
510        // Code spans should NOT be detected as code blocks
511        let content = "Text ` code with spaces ` more";
512        let blocks = CodeBlockUtils::detect_code_blocks(content);
513        assert_eq!(blocks.len(), 0); // No blocks, only inline span
514    }
515
516    #[test]
517    fn test_fenced_block_with_info_string() {
518        // Fenced code blocks with complex info strings
519        let content = "```rust,no_run,should_panic\ncode\n```";
520        let blocks = CodeBlockUtils::detect_code_blocks(content);
521        // 1 fenced block (fence markers no longer detected as inline spans)
522        assert_eq!(blocks.len(), 1);
523        assert_eq!(blocks[0].0, 0);
524    }
525
526    #[test]
527    fn test_indented_fences_not_code_blocks() {
528        // Indented fence markers should still work as fences
529        let content = "Text\n  ```\n  code\n  ```\nAfter";
530        let blocks = CodeBlockUtils::detect_code_blocks(content);
531        // Only 1 fenced block (indented fences still work)
532        assert_eq!(blocks.len(), 1);
533    }
534
535    // Issue #175: Backticks in info string invalidate the fence
536    #[test]
537    fn test_backticks_in_info_string_not_code_block() {
538        // Per CommonMark spec: "If the info string comes after a backtick fence,
539        // it may not contain any backtick characters."
540        // So ```something``` is NOT a valid fence - the backticks are treated as inline code.
541        // Verified with: echo '```something```' | npx commonmark
542        // Output: <p><code>something</code></p>
543        let content = "```something```\n\n```bash\n# comment\n```";
544        let blocks = CodeBlockUtils::detect_code_blocks(content);
545        // Should find only the valid ```bash block, NOT the invalid ```something```
546        assert_eq!(blocks.len(), 1);
547        // The valid block should contain "# comment"
548        assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
549    }
550
551    #[test]
552    fn test_issue_175_reproduction() {
553        // Full reproduction of issue #175
554        let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
555        let blocks = CodeBlockUtils::detect_code_blocks(content);
556        // Only the bash block is a code block
557        assert_eq!(blocks.len(), 1);
558        assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
559    }
560
561    #[test]
562    fn test_tilde_fence_allows_tildes_in_info_string() {
563        // Tilde fences CAN have tildes in info string (only backtick restriction exists)
564        // ~~~abc~~~ opens an unclosed code block with info string "abc~~~"
565        let content = "~~~abc~~~\ncode content\n~~~";
566        let blocks = CodeBlockUtils::detect_code_blocks(content);
567        // This is a valid tilde fence that opens and closes
568        assert_eq!(blocks.len(), 1);
569    }
570
571    #[test]
572    fn test_nested_longer_fence_contains_shorter() {
573        // Longer fence (````) can contain shorter fence (```) as content
574        let content = "````\n```\nnested content\n```\n````";
575        let blocks = CodeBlockUtils::detect_code_blocks(content);
576        assert_eq!(blocks.len(), 1);
577        assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
578    }
579
580    #[test]
581    fn test_mixed_fence_types() {
582        // Tilde fence contains backtick markers as content
583        let content = "~~~\n```\nmixed content\n~~~";
584        let blocks = CodeBlockUtils::detect_code_blocks(content);
585        assert_eq!(blocks.len(), 1);
586        assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
587    }
588
589    #[test]
590    fn test_indented_code_in_list_issue_276() {
591        // Issue #276: Indented code block inside a list should be detected by pulldown-cmark
592        let content = r#"1. First item
5932. Second item with code:
594
595        # This is a code block in a list
596        print("Hello, world!")
597
5984. Third item"#;
599
600        let blocks = CodeBlockUtils::detect_code_blocks(content);
601        // pulldown-cmark SHOULD detect this indented code block inside the list
602        assert!(!blocks.is_empty(), "Should detect indented code block inside list");
603
604        // Verify the detected block contains our code
605        let all_content: String = blocks
606            .iter()
607            .map(|(s, e)| &content[*s..*e])
608            .collect::<Vec<_>>()
609            .join("");
610        assert!(
611            all_content.contains("code block in a list") || all_content.contains("print"),
612            "Detected block should contain the code content: {all_content:?}"
613        );
614    }
615
616    #[test]
617    fn test_detect_markdown_code_blocks() {
618        let content = r#"# Example
619
620```markdown
621# Heading
622Content here
623```
624
625```md
626Another heading
627More content
628```
629
630```rust
631// Not markdown
632fn main() {}
633```
634"#;
635
636        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
637
638        // Should detect 2 blocks (markdown and md, not rust)
639        assert_eq!(
640            blocks.len(),
641            2,
642            "Should detect exactly 2 markdown blocks, got {blocks:?}"
643        );
644
645        // First block should be the ```markdown block
646        let first = &blocks[0];
647        let first_content = &content[first.content_start..first.content_end];
648        assert!(
649            first_content.contains("# Heading"),
650            "First block should contain '# Heading', got: {first_content:?}"
651        );
652
653        // Second block should be the ```md block
654        let second = &blocks[1];
655        let second_content = &content[second.content_start..second.content_end];
656        assert!(
657            second_content.contains("Another heading"),
658            "Second block should contain 'Another heading', got: {second_content:?}"
659        );
660    }
661
662    #[test]
663    fn test_detect_markdown_code_blocks_empty() {
664        let content = "# Just a heading\n\nNo code blocks here\n";
665        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
666        assert_eq!(blocks.len(), 0);
667    }
668
669    #[test]
670    fn test_detect_markdown_code_blocks_case_insensitive() {
671        let content = "```MARKDOWN\nContent\n```\n";
672        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
673        assert_eq!(blocks.len(), 1);
674    }
675
676    #[test]
677    fn test_detect_markdown_code_blocks_at_eof_no_trailing_newline() {
678        // Block at end of file without trailing newline after closing fence
679        let content = "# Doc\n\n```markdown\nContent\n```";
680        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
681        assert_eq!(blocks.len(), 1);
682        // Content should be extractable without panic
683        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
684        assert!(block_content.contains("Content"));
685    }
686
687    #[test]
688    fn test_detect_markdown_code_blocks_single_line_content() {
689        // Single line of content, no extra newlines
690        let content = "```markdown\nX\n```\n";
691        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
692        assert_eq!(blocks.len(), 1);
693        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
694        assert_eq!(block_content, "X");
695    }
696
697    #[test]
698    fn test_detect_markdown_code_blocks_empty_content() {
699        // Block with no content between fences
700        let content = "```markdown\n```\n";
701        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
702        // Should detect block but with empty range or not at all
703        // Either behavior is acceptable as long as no panic
704        if !blocks.is_empty() {
705            // If detected, content range should be valid
706            assert!(blocks[0].content_start <= blocks[0].content_end);
707        }
708    }
709
710    #[test]
711    fn test_detect_markdown_code_blocks_validates_ranges() {
712        // Ensure no panic on various edge cases
713        let test_cases = [
714            "",                             // Empty content
715            "```markdown",                  // Unclosed block
716            "```markdown\n",                // Unclosed block with newline
717            "```\n```",                     // Non-markdown block
718            "```markdown\n```",             // Empty markdown block
719            "   ```markdown\n   X\n   ```", // Indented block
720        ];
721
722        for content in test_cases {
723            // Should not panic
724            let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
725            // All detected blocks should have valid ranges
726            for block in &blocks {
727                assert!(
728                    block.content_start <= block.content_end,
729                    "Invalid range in content: {content:?}"
730                );
731                assert!(
732                    block.content_end <= content.len(),
733                    "Range exceeds content length in: {content:?}"
734                );
735            }
736        }
737    }
738}