rumdl_lib/utils/
code_block_utils.rs

1//!
2//! Utility functions for detecting and handling code blocks and code spans in Markdown for rumdl.
3//!
4//! Code block detection is delegated to pulldown-cmark, which correctly implements the
5//! CommonMark specification. This handles edge cases like:
6//! - Backtick fences with backticks in the info string (invalid per spec)
7//! - Nested fences (longer fence contains shorter fence as content)
8//! - Mixed fence types (tilde fence contains backticks as content)
9//! - Indented code blocks with proper list context handling
10
11use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13/// Classification of code blocks relative to list contexts
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum CodeBlockContext {
16    /// Code block that separates lists (root-level, with blank lines)
17    Standalone,
18    /// Code block that continues a list (properly indented)
19    Indented,
20    /// Code block adjacent to list content (edge case, defaults to non-breaking)
21    Adjacent,
22}
23
24/// Utility functions for detecting and handling code blocks in Markdown
25pub struct CodeBlockUtils;
26
27impl CodeBlockUtils {
28    /// Detect all code blocks in the content (NOT including inline code spans)
29    ///
30    /// Uses pulldown-cmark for spec-compliant CommonMark parsing. This correctly handles:
31    /// - Fenced code blocks (``` and ~~~)
32    /// - Indented code blocks (4 spaces or tab)
33    /// - Code blocks inside lists, blockquotes, and other containers
34    /// - Edge cases like backticks in info strings (which invalidate the fence)
35    ///
36    /// Returns a sorted vector of (start, end) byte offset tuples.
37    pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
38        let mut blocks = Vec::new();
39        let mut code_block_start: Option<usize> = None;
40
41        // Use pulldown-cmark with all extensions for maximum compatibility
42        let options = Options::all();
43        let parser = Parser::new_ext(content, options).into_offset_iter();
44
45        for (event, range) in parser {
46            match event {
47                Event::Start(Tag::CodeBlock(_)) => {
48                    // Record start position of code block
49                    code_block_start = Some(range.start);
50                }
51                Event::End(TagEnd::CodeBlock) => {
52                    // Complete the code block range
53                    if let Some(start) = code_block_start.take() {
54                        blocks.push((start, range.end));
55                    }
56                }
57                _ => {}
58            }
59        }
60
61        // Handle edge case: unclosed code block at end of content
62        // pulldown-cmark should handle this, but be defensive
63        if let Some(start) = code_block_start {
64            blocks.push((start, content.len()));
65        }
66
67        // Sort by start position (should already be sorted, but ensure consistency)
68        blocks.sort_by_key(|&(start, _)| start);
69        blocks
70    }
71
72    /// Check if a position is within a code block (for compatibility)
73    pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
74        // This is a compatibility function - it only checks code blocks now, not spans
75        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
76    }
77
78    /// Check if a position is within a code block (NOT including inline code spans)
79    pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
80        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
81    }
82
83    /// Analyze code block context relative to list parsing
84    /// This is the core function implementing Design #3's three-tier classification
85    pub fn analyze_code_block_context(
86        lines: &[crate::lint_context::LineInfo],
87        line_idx: usize,
88        min_continuation_indent: usize,
89    ) -> CodeBlockContext {
90        if let Some(line_info) = lines.get(line_idx) {
91            // Rule 1: Indentation Analysis - Is it sufficiently indented for list continuation?
92            if line_info.indent >= min_continuation_indent {
93                return CodeBlockContext::Indented;
94            }
95
96            // Rule 2: Blank Line Context - Check for structural separation indicators
97            let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
98
99            // Rule 3: Standalone Detection - Insufficient indentation + blank line separation
100            // This is the key fix: root-level code blocks with blank lines separate lists
101            if prev_blanks > 0 || next_blanks > 0 {
102                return CodeBlockContext::Standalone;
103            }
104
105            // Rule 4: Default - Adjacent (conservative, non-breaking for edge cases)
106            CodeBlockContext::Adjacent
107        } else {
108            // Fallback for invalid line index
109            CodeBlockContext::Adjacent
110        }
111    }
112
113    /// Count blank lines before and after the given line index
114    fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
115        let mut prev_blanks = 0;
116        let mut next_blanks = 0;
117
118        // Count blank lines before (look backwards)
119        for i in (0..line_idx).rev() {
120            if let Some(line) = lines.get(i) {
121                if line.is_blank {
122                    prev_blanks += 1;
123                } else {
124                    break;
125                }
126            } else {
127                break;
128            }
129        }
130
131        // Count blank lines after (look forwards)
132        for i in (line_idx + 1)..lines.len() {
133            if let Some(line) = lines.get(i) {
134                if line.is_blank {
135                    next_blanks += 1;
136                } else {
137                    break;
138                }
139            } else {
140                break;
141            }
142        }
143
144        (prev_blanks, next_blanks)
145    }
146
147    /// Calculate minimum indentation required for code block to continue a list
148    /// Based on the most recent list item's marker width
149    pub fn calculate_min_continuation_indent(
150        content: &str,
151        lines: &[crate::lint_context::LineInfo],
152        current_line_idx: usize,
153    ) -> usize {
154        // Look backwards to find the most recent list item
155        for i in (0..current_line_idx).rev() {
156            if let Some(line_info) = lines.get(i) {
157                if let Some(list_item) = &line_info.list_item {
158                    // Calculate minimum continuation indent for this list item
159                    return if list_item.is_ordered {
160                        list_item.marker_column + list_item.marker.len() + 1 // +1 for space after marker
161                    } else {
162                        list_item.marker_column + 2 // Unordered lists need marker + space (min 2)
163                    };
164                }
165
166                // Stop at structural separators that would break list context
167                if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
168                    break;
169                }
170            }
171        }
172
173        0 // No list context found
174    }
175
176    /// Check if content is a structural separator (headings, horizontal rules, etc.)
177    fn is_structural_separator(content: &str) -> bool {
178        let trimmed = content.trim();
179        trimmed.starts_with("---")
180            || trimmed.starts_with("***")
181            || trimmed.starts_with("___")
182            || crate::utils::skip_context::is_table_line(trimmed)
183            || trimmed.starts_with(">") // Blockquotes
184    }
185
186    /// Detect fenced code blocks with markdown/md language tag.
187    ///
188    /// Returns a vector of `MarkdownCodeBlock` containing byte ranges for the
189    /// content between the fences (excluding the fence lines themselves).
190    ///
191    /// Only detects fenced code blocks (``` or ~~~), not indented code blocks,
192    /// since indented blocks don't have a language tag.
193    pub fn detect_markdown_code_blocks(content: &str) -> Vec<MarkdownCodeBlock> {
194        use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
195
196        let mut blocks = Vec::new();
197        let mut current_block: Option<MarkdownCodeBlockBuilder> = None;
198
199        let options = Options::all();
200        let parser = Parser::new_ext(content, options).into_offset_iter();
201
202        for (event, range) in parser {
203            match event {
204                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
205                    // Check if language is markdown or md (first word of info string)
206                    let language = info.split_whitespace().next().unwrap_or("");
207                    if language.eq_ignore_ascii_case("markdown") || language.eq_ignore_ascii_case("md") {
208                        // Find where content starts (after the opening fence line)
209                        let block_start = range.start;
210                        let content_start = content[block_start..]
211                            .find('\n')
212                            .map(|i| block_start + i + 1)
213                            .unwrap_or(content.len());
214
215                        current_block = Some(MarkdownCodeBlockBuilder { content_start });
216                    }
217                }
218                Event::End(TagEnd::CodeBlock) => {
219                    if let Some(builder) = current_block.take() {
220                        // Find where content ends (before the closing fence line)
221                        let block_end = range.end;
222
223                        // Validate range before slicing
224                        if builder.content_start > block_end || builder.content_start > content.len() {
225                            continue;
226                        }
227
228                        let search_range = &content[builder.content_start..block_end.min(content.len())];
229                        let content_end = search_range
230                            .rfind('\n')
231                            .map(|i| builder.content_start + i)
232                            .unwrap_or(builder.content_start);
233
234                        // Only add block if it has valid content range
235                        if content_end >= builder.content_start {
236                            blocks.push(MarkdownCodeBlock {
237                                content_start: builder.content_start,
238                                content_end,
239                            });
240                        }
241                    }
242                }
243                _ => {}
244            }
245        }
246
247        blocks
248    }
249}
250
251/// Information about a markdown code block for recursive formatting
252#[derive(Debug, Clone)]
253pub struct MarkdownCodeBlock {
254    /// Byte offset where the content starts (after opening fence line)
255    pub content_start: usize,
256    /// Byte offset where the content ends (before closing fence line)
257    pub content_end: usize,
258}
259
260/// Builder for MarkdownCodeBlock during parsing
261struct MarkdownCodeBlockBuilder {
262    content_start: usize,
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_detect_fenced_code_blocks() {
271        // The function detects fenced blocks and inline code spans
272        // Fence markers (``` at line start) are now skipped in inline span detection
273
274        // Basic fenced code block with backticks
275        let content = "Some text\n```\ncode here\n```\nMore text";
276        let blocks = CodeBlockUtils::detect_code_blocks(content);
277        // Should find: 1 fenced block (fences are no longer detected as inline spans)
278        assert_eq!(blocks.len(), 1);
279
280        // Check that we have the fenced block
281        let fenced_block = blocks
282            .iter()
283            .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
284        assert!(fenced_block.is_some());
285
286        // Fenced code block with tildes (no inline code detection for ~)
287        let content = "Some text\n~~~\ncode here\n~~~\nMore text";
288        let blocks = CodeBlockUtils::detect_code_blocks(content);
289        assert_eq!(blocks.len(), 1);
290        assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
291
292        // Multiple code blocks
293        let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
294        let blocks = CodeBlockUtils::detect_code_blocks(content);
295        // 2 fenced blocks (fence markers no longer detected as inline spans)
296        assert_eq!(blocks.len(), 2);
297    }
298
299    #[test]
300    fn test_detect_code_blocks_with_language() {
301        // Code block with language identifier
302        let content = "Text\n```rust\nfn main() {}\n```\nMore";
303        let blocks = CodeBlockUtils::detect_code_blocks(content);
304        // 1 fenced block (fence markers no longer detected as inline spans)
305        assert_eq!(blocks.len(), 1);
306        // Check we have the full fenced block
307        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
308        assert!(fenced.is_some());
309    }
310
311    #[test]
312    fn test_unclosed_code_block() {
313        // Unclosed code block should extend to end of content
314        let content = "Text\n```\ncode here\nno closing fence";
315        let blocks = CodeBlockUtils::detect_code_blocks(content);
316        assert_eq!(blocks.len(), 1);
317        assert_eq!(blocks[0].1, content.len());
318    }
319
320    #[test]
321    fn test_indented_code_blocks() {
322        // Basic indented code block
323        let content = "Paragraph\n\n    code line 1\n    code line 2\n\nMore text";
324        let blocks = CodeBlockUtils::detect_code_blocks(content);
325        assert_eq!(blocks.len(), 1);
326        assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
327        assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
328
329        // Indented code with tabs
330        let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
331        let blocks = CodeBlockUtils::detect_code_blocks(content);
332        assert_eq!(blocks.len(), 1);
333    }
334
335    #[test]
336    fn test_indented_code_requires_blank_line() {
337        // Indented lines without preceding blank line are not code blocks
338        let content = "Paragraph\n    indented but not code\nMore text";
339        let blocks = CodeBlockUtils::detect_code_blocks(content);
340        assert_eq!(blocks.len(), 0);
341
342        // With blank line, it becomes a code block
343        let content = "Paragraph\n\n    now it's code\nMore text";
344        let blocks = CodeBlockUtils::detect_code_blocks(content);
345        assert_eq!(blocks.len(), 1);
346    }
347
348    #[test]
349    fn test_indented_content_with_list_markers_is_code_block() {
350        // Per CommonMark spec: 4-space indented content after blank line IS a code block,
351        // even if the content looks like list markers. The indentation takes precedence.
352        // Verified with: echo 'List:\n\n    - Item 1' | npx commonmark
353        // Output: <pre><code>- Item 1</code></pre>
354        let content = "List:\n\n    - Item 1\n    - Item 2\n    * Item 3\n    + Item 4";
355        let blocks = CodeBlockUtils::detect_code_blocks(content);
356        assert_eq!(blocks.len(), 1); // This IS a code block per spec
357
358        // Same for numbered list markers
359        let content = "List:\n\n    1. First\n    2. Second";
360        let blocks = CodeBlockUtils::detect_code_blocks(content);
361        assert_eq!(blocks.len(), 1); // This IS a code block per spec
362    }
363
364    #[test]
365    fn test_actual_list_items_not_code_blocks() {
366        // Actual list items (no preceding blank line + 4 spaces) are NOT code blocks
367        let content = "- Item 1\n- Item 2\n* Item 3";
368        let blocks = CodeBlockUtils::detect_code_blocks(content);
369        assert_eq!(blocks.len(), 0);
370
371        // Nested list items
372        let content = "- Item 1\n  - Nested item\n- Item 2";
373        let blocks = CodeBlockUtils::detect_code_blocks(content);
374        assert_eq!(blocks.len(), 0);
375    }
376
377    #[test]
378    fn test_inline_code_spans_not_detected() {
379        // Inline code spans should NOT be detected as code blocks
380        let content = "Text with `inline code` here";
381        let blocks = CodeBlockUtils::detect_code_blocks(content);
382        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
383
384        // Multiple backtick code span
385        let content = "Text with ``code with ` backtick`` here";
386        let blocks = CodeBlockUtils::detect_code_blocks(content);
387        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
388
389        // Multiple code spans
390        let content = "Has `code1` and `code2` spans";
391        let blocks = CodeBlockUtils::detect_code_blocks(content);
392        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
393    }
394
395    #[test]
396    fn test_unclosed_code_span() {
397        // Unclosed code span should not be detected
398        let content = "Text with `unclosed code span";
399        let blocks = CodeBlockUtils::detect_code_blocks(content);
400        assert_eq!(blocks.len(), 0);
401
402        // Mismatched backticks
403        let content = "Text with ``one style` different close";
404        let blocks = CodeBlockUtils::detect_code_blocks(content);
405        assert_eq!(blocks.len(), 0);
406    }
407
408    #[test]
409    fn test_mixed_code_blocks_and_spans() {
410        let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
411        let blocks = CodeBlockUtils::detect_code_blocks(content);
412        // Should only detect the fenced block, NOT the inline spans
413        assert_eq!(blocks.len(), 1);
414
415        // Check we have the fenced block only
416        assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
417        // Should NOT detect inline spans
418        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
419        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
420    }
421
422    #[test]
423    fn test_is_in_code_block_or_span() {
424        let blocks = vec![(10, 20), (30, 40), (50, 60)];
425
426        // Test positions inside blocks
427        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
428        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
429        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
430
431        // Test positions at boundaries
432        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); // Start is inclusive
433        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); // End is exclusive
434
435        // Test positions outside blocks
436        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
437        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
438        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
439    }
440
441    #[test]
442    fn test_empty_content() {
443        let blocks = CodeBlockUtils::detect_code_blocks("");
444        assert_eq!(blocks.len(), 0);
445    }
446
447    #[test]
448    fn test_code_block_at_start() {
449        let content = "```\ncode\n```\nText after";
450        let blocks = CodeBlockUtils::detect_code_blocks(content);
451        // 1 fenced block (fence markers no longer detected as inline spans)
452        assert_eq!(blocks.len(), 1);
453        assert_eq!(blocks[0].0, 0); // Fenced block starts at 0
454    }
455
456    #[test]
457    fn test_code_block_at_end() {
458        let content = "Text before\n```\ncode\n```";
459        let blocks = CodeBlockUtils::detect_code_blocks(content);
460        // 1 fenced block (fence markers no longer detected as inline spans)
461        assert_eq!(blocks.len(), 1);
462        // Check we have the fenced block
463        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
464        assert!(fenced.is_some());
465    }
466
467    #[test]
468    fn test_nested_fence_markers() {
469        // Code block containing fence markers as content
470        let content = "Text\n````\n```\nnested\n```\n````\nAfter";
471        let blocks = CodeBlockUtils::detect_code_blocks(content);
472        // Should detect: outer block, inner ```, outer ````
473        assert!(!blocks.is_empty());
474        // Check we have the outer block
475        let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
476        assert!(outer.is_some());
477    }
478
479    #[test]
480    fn test_indented_code_with_blank_lines() {
481        // Indented code blocks can contain blank lines
482        let content = "Text\n\n    line1\n\n    line2\n\nAfter";
483        let blocks = CodeBlockUtils::detect_code_blocks(content);
484        // May have multiple blocks due to blank line handling
485        assert!(!blocks.is_empty());
486        // Check that we captured the indented code
487        let all_content: String = blocks
488            .iter()
489            .map(|(s, e)| &content[*s..*e])
490            .collect::<Vec<_>>()
491            .join("");
492        assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
493    }
494
495    #[test]
496    fn test_code_span_with_spaces() {
497        // Code spans should NOT be detected as code blocks
498        let content = "Text ` code with spaces ` more";
499        let blocks = CodeBlockUtils::detect_code_blocks(content);
500        assert_eq!(blocks.len(), 0); // No blocks, only inline span
501    }
502
503    #[test]
504    fn test_fenced_block_with_info_string() {
505        // Fenced code blocks with complex info strings
506        let content = "```rust,no_run,should_panic\ncode\n```";
507        let blocks = CodeBlockUtils::detect_code_blocks(content);
508        // 1 fenced block (fence markers no longer detected as inline spans)
509        assert_eq!(blocks.len(), 1);
510        assert_eq!(blocks[0].0, 0);
511    }
512
513    #[test]
514    fn test_indented_fences_not_code_blocks() {
515        // Indented fence markers should still work as fences
516        let content = "Text\n  ```\n  code\n  ```\nAfter";
517        let blocks = CodeBlockUtils::detect_code_blocks(content);
518        // Only 1 fenced block (indented fences still work)
519        assert_eq!(blocks.len(), 1);
520    }
521
522    // Issue #175: Backticks in info string invalidate the fence
523    #[test]
524    fn test_backticks_in_info_string_not_code_block() {
525        // Per CommonMark spec: "If the info string comes after a backtick fence,
526        // it may not contain any backtick characters."
527        // So ```something``` is NOT a valid fence - the backticks are treated as inline code.
528        // Verified with: echo '```something```' | npx commonmark
529        // Output: <p><code>something</code></p>
530        let content = "```something```\n\n```bash\n# comment\n```";
531        let blocks = CodeBlockUtils::detect_code_blocks(content);
532        // Should find only the valid ```bash block, NOT the invalid ```something```
533        assert_eq!(blocks.len(), 1);
534        // The valid block should contain "# comment"
535        assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
536    }
537
538    #[test]
539    fn test_issue_175_reproduction() {
540        // Full reproduction of issue #175
541        let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
542        let blocks = CodeBlockUtils::detect_code_blocks(content);
543        // Only the bash block is a code block
544        assert_eq!(blocks.len(), 1);
545        assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
546    }
547
548    #[test]
549    fn test_tilde_fence_allows_tildes_in_info_string() {
550        // Tilde fences CAN have tildes in info string (only backtick restriction exists)
551        // ~~~abc~~~ opens an unclosed code block with info string "abc~~~"
552        let content = "~~~abc~~~\ncode content\n~~~";
553        let blocks = CodeBlockUtils::detect_code_blocks(content);
554        // This is a valid tilde fence that opens and closes
555        assert_eq!(blocks.len(), 1);
556    }
557
558    #[test]
559    fn test_nested_longer_fence_contains_shorter() {
560        // Longer fence (````) can contain shorter fence (```) as content
561        let content = "````\n```\nnested content\n```\n````";
562        let blocks = CodeBlockUtils::detect_code_blocks(content);
563        assert_eq!(blocks.len(), 1);
564        assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
565    }
566
567    #[test]
568    fn test_mixed_fence_types() {
569        // Tilde fence contains backtick markers as content
570        let content = "~~~\n```\nmixed content\n~~~";
571        let blocks = CodeBlockUtils::detect_code_blocks(content);
572        assert_eq!(blocks.len(), 1);
573        assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
574    }
575
576    #[test]
577    fn test_indented_code_in_list_issue_276() {
578        // Issue #276: Indented code block inside a list should be detected by pulldown-cmark
579        let content = r#"1. First item
5802. Second item with code:
581
582        # This is a code block in a list
583        print("Hello, world!")
584
5854. Third item"#;
586
587        let blocks = CodeBlockUtils::detect_code_blocks(content);
588        // pulldown-cmark SHOULD detect this indented code block inside the list
589        assert!(!blocks.is_empty(), "Should detect indented code block inside list");
590
591        // Verify the detected block contains our code
592        let all_content: String = blocks
593            .iter()
594            .map(|(s, e)| &content[*s..*e])
595            .collect::<Vec<_>>()
596            .join("");
597        assert!(
598            all_content.contains("code block in a list") || all_content.contains("print"),
599            "Detected block should contain the code content: {all_content:?}"
600        );
601    }
602
603    #[test]
604    fn test_detect_markdown_code_blocks() {
605        let content = r#"# Example
606
607```markdown
608# Heading
609Content here
610```
611
612```md
613Another heading
614More content
615```
616
617```rust
618// Not markdown
619fn main() {}
620```
621"#;
622
623        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
624
625        // Should detect 2 blocks (markdown and md, not rust)
626        assert_eq!(
627            blocks.len(),
628            2,
629            "Should detect exactly 2 markdown blocks, got {blocks:?}"
630        );
631
632        // First block should be the ```markdown block
633        let first = &blocks[0];
634        let first_content = &content[first.content_start..first.content_end];
635        assert!(
636            first_content.contains("# Heading"),
637            "First block should contain '# Heading', got: {first_content:?}"
638        );
639
640        // Second block should be the ```md block
641        let second = &blocks[1];
642        let second_content = &content[second.content_start..second.content_end];
643        assert!(
644            second_content.contains("Another heading"),
645            "Second block should contain 'Another heading', got: {second_content:?}"
646        );
647    }
648
649    #[test]
650    fn test_detect_markdown_code_blocks_empty() {
651        let content = "# Just a heading\n\nNo code blocks here\n";
652        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
653        assert_eq!(blocks.len(), 0);
654    }
655
656    #[test]
657    fn test_detect_markdown_code_blocks_case_insensitive() {
658        let content = "```MARKDOWN\nContent\n```\n";
659        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
660        assert_eq!(blocks.len(), 1);
661    }
662
663    #[test]
664    fn test_detect_markdown_code_blocks_at_eof_no_trailing_newline() {
665        // Block at end of file without trailing newline after closing fence
666        let content = "# Doc\n\n```markdown\nContent\n```";
667        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
668        assert_eq!(blocks.len(), 1);
669        // Content should be extractable without panic
670        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
671        assert!(block_content.contains("Content"));
672    }
673
674    #[test]
675    fn test_detect_markdown_code_blocks_single_line_content() {
676        // Single line of content, no extra newlines
677        let content = "```markdown\nX\n```\n";
678        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
679        assert_eq!(blocks.len(), 1);
680        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
681        assert_eq!(block_content, "X");
682    }
683
684    #[test]
685    fn test_detect_markdown_code_blocks_empty_content() {
686        // Block with no content between fences
687        let content = "```markdown\n```\n";
688        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
689        // Should detect block but with empty range or not at all
690        // Either behavior is acceptable as long as no panic
691        if !blocks.is_empty() {
692            // If detected, content range should be valid
693            assert!(blocks[0].content_start <= blocks[0].content_end);
694        }
695    }
696
697    #[test]
698    fn test_detect_markdown_code_blocks_validates_ranges() {
699        // Ensure no panic on various edge cases
700        let test_cases = [
701            "",                             // Empty content
702            "```markdown",                  // Unclosed block
703            "```markdown\n",                // Unclosed block with newline
704            "```\n```",                     // Non-markdown block
705            "```markdown\n```",             // Empty markdown block
706            "   ```markdown\n   X\n   ```", // Indented block
707        ];
708
709        for content in test_cases {
710            // Should not panic
711            let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
712            // All detected blocks should have valid ranges
713            for block in &blocks {
714                assert!(
715                    block.content_start <= block.content_end,
716                    "Invalid range in content: {content:?}"
717                );
718                assert!(
719                    block.content_end <= content.len(),
720                    "Range exceeds content length in: {content:?}"
721                );
722            }
723        }
724    }
725}