Skip to main content

rumdl_lib/utils/
code_block_utils.rs

1//!
2//! Utility functions for detecting and handling code blocks and code spans in Markdown for rumdl.
3//!
4//! Code block detection is delegated to pulldown-cmark, which correctly implements the
5//! CommonMark specification. This handles edge cases like:
6//! - Backtick fences with backticks in the info string (invalid per spec)
7//! - Nested fences (longer fence contains shorter fence as content)
8//! - Mixed fence types (tilde fence contains backticks as content)
9//! - Indented code blocks with proper list context handling
10
11use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
12
13/// Type alias for code block and span ranges: (code_blocks, code_spans)
14pub type CodeRanges = (Vec<(usize, usize)>, Vec<(usize, usize)>);
15
16/// Classification of code blocks relative to list contexts
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum CodeBlockContext {
19    /// Code block that separates lists (root-level, with blank lines)
20    Standalone,
21    /// Code block that continues a list (properly indented)
22    Indented,
23    /// Code block adjacent to list content (edge case, defaults to non-breaking)
24    Adjacent,
25}
26
27/// Utility functions for detecting and handling code blocks in Markdown
28pub struct CodeBlockUtils;
29
30impl CodeBlockUtils {
31    /// Detect all code blocks in the content (NOT including inline code spans)
32    ///
33    /// Uses pulldown-cmark for spec-compliant CommonMark parsing. This correctly handles:
34    /// - Fenced code blocks (``` and ~~~)
35    /// - Indented code blocks (4 spaces or tab)
36    /// - Code blocks inside lists, blockquotes, and other containers
37    /// - Edge cases like backticks in info strings (which invalidate the fence)
38    ///
39    /// Returns a sorted vector of (start, end) byte offset tuples.
40    pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
41        let (blocks, _) = Self::detect_code_blocks_and_spans(content);
42        blocks
43    }
44
45    /// Returns code block ranges and inline code span ranges in a single pulldown-cmark pass.
46    pub fn detect_code_blocks_and_spans(content: &str) -> CodeRanges {
47        let mut blocks = Vec::new();
48        let mut spans = Vec::new();
49        let mut code_block_start: Option<usize> = None;
50
51        // Use pulldown-cmark with all extensions for maximum compatibility
52        let options = Options::all();
53        let parser = Parser::new_ext(content, options).into_offset_iter();
54
55        for (event, range) in parser {
56            match event {
57                Event::Start(Tag::CodeBlock(_)) => {
58                    // Record start position of code block
59                    code_block_start = Some(range.start);
60                }
61                Event::End(TagEnd::CodeBlock) => {
62                    // Complete the code block range
63                    if let Some(start) = code_block_start.take() {
64                        blocks.push((start, range.end));
65                    }
66                }
67                Event::Code(_) => {
68                    spans.push((range.start, range.end));
69                }
70                _ => {}
71            }
72        }
73
74        // Handle edge case: unclosed code block at end of content
75        // pulldown-cmark should handle this, but be defensive
76        if let Some(start) = code_block_start {
77            blocks.push((start, content.len()));
78        }
79
80        // Sort by start position (should already be sorted, but ensure consistency)
81        blocks.sort_by_key(|&(start, _)| start);
82        (blocks, spans)
83    }
84
85    /// Check if a position is within a code block (for compatibility)
86    pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
87        Self::is_in_code_block(blocks, pos)
88    }
89
90    /// Check if a byte position falls within any of the given sorted, non-overlapping ranges.
91    ///
92    /// Uses binary search on the sorted block ranges for O(log n) lookup.
93    /// The blocks slice must be sorted by start position (as returned by
94    /// `detect_code_blocks` and `detect_code_blocks_and_spans`).
95    pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
96        // Binary search: find the last block whose start <= pos
97        let idx = blocks.partition_point(|&(start, _)| start <= pos);
98        // partition_point returns the first index where start > pos,
99        // so the candidate is at idx - 1
100        idx > 0 && pos < blocks[idx - 1].1
101    }
102
103    /// Analyze code block context relative to list parsing
104    /// This is the core function implementing Design #3's three-tier classification
105    pub fn analyze_code_block_context(
106        lines: &[crate::lint_context::LineInfo],
107        line_idx: usize,
108        min_continuation_indent: usize,
109    ) -> CodeBlockContext {
110        if let Some(line_info) = lines.get(line_idx) {
111            // Rule 1: Indentation Analysis - Is it sufficiently indented for list continuation?
112            if line_info.indent >= min_continuation_indent {
113                return CodeBlockContext::Indented;
114            }
115
116            // Rule 2: Blank Line Context - Check for structural separation indicators
117            let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
118
119            // Rule 3: Standalone Detection - Insufficient indentation + blank line separation
120            // This is the key fix: root-level code blocks with blank lines separate lists
121            if prev_blanks > 0 || next_blanks > 0 {
122                return CodeBlockContext::Standalone;
123            }
124
125            // Rule 4: Default - Adjacent (conservative, non-breaking for edge cases)
126            CodeBlockContext::Adjacent
127        } else {
128            // Fallback for invalid line index
129            CodeBlockContext::Adjacent
130        }
131    }
132
133    /// Count blank lines before and after the given line index
134    fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
135        let mut prev_blanks = 0;
136        let mut next_blanks = 0;
137
138        // Count blank lines before (look backwards)
139        for i in (0..line_idx).rev() {
140            if let Some(line) = lines.get(i) {
141                if line.is_blank {
142                    prev_blanks += 1;
143                } else {
144                    break;
145                }
146            } else {
147                break;
148            }
149        }
150
151        // Count blank lines after (look forwards)
152        for i in (line_idx + 1)..lines.len() {
153            if let Some(line) = lines.get(i) {
154                if line.is_blank {
155                    next_blanks += 1;
156                } else {
157                    break;
158                }
159            } else {
160                break;
161            }
162        }
163
164        (prev_blanks, next_blanks)
165    }
166
167    /// Calculate minimum indentation required for code block to continue a list
168    /// Based on the most recent list item's marker width
169    pub fn calculate_min_continuation_indent(
170        content: &str,
171        lines: &[crate::lint_context::LineInfo],
172        current_line_idx: usize,
173    ) -> usize {
174        // Look backwards to find the most recent list item
175        for i in (0..current_line_idx).rev() {
176            if let Some(line_info) = lines.get(i) {
177                if let Some(list_item) = &line_info.list_item {
178                    // Calculate minimum continuation indent for this list item
179                    return if list_item.is_ordered {
180                        list_item.marker_column + list_item.marker.len() + 1 // +1 for space after marker
181                    } else {
182                        list_item.marker_column + 2 // Unordered lists need marker + space (min 2)
183                    };
184                }
185
186                // Stop at structural separators that would break list context
187                if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
188                    break;
189                }
190            }
191        }
192
193        0 // No list context found
194    }
195
196    /// Check if content is a structural separator (headings, horizontal rules, etc.)
197    fn is_structural_separator(content: &str) -> bool {
198        let trimmed = content.trim();
199        trimmed.starts_with("---")
200            || trimmed.starts_with("***")
201            || trimmed.starts_with("___")
202            || crate::utils::skip_context::is_table_line(trimmed)
203            || trimmed.starts_with(">") // Blockquotes
204    }
205
206    /// Detect fenced code blocks with markdown/md language tag.
207    ///
208    /// Returns a vector of `MarkdownCodeBlock` containing byte ranges for the
209    /// content between the fences (excluding the fence lines themselves).
210    ///
211    /// Only detects fenced code blocks (``` or ~~~), not indented code blocks,
212    /// since indented blocks don't have a language tag.
213    pub fn detect_markdown_code_blocks(content: &str) -> Vec<MarkdownCodeBlock> {
214        use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
215
216        let mut blocks = Vec::new();
217        let mut current_block: Option<MarkdownCodeBlockBuilder> = None;
218
219        let options = Options::all();
220        let parser = Parser::new_ext(content, options).into_offset_iter();
221
222        for (event, range) in parser {
223            match event {
224                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
225                    // Check if language is markdown or md (first word of info string)
226                    let language = info.split_whitespace().next().unwrap_or("");
227                    if language.eq_ignore_ascii_case("markdown") || language.eq_ignore_ascii_case("md") {
228                        // Find where content starts (after the opening fence line)
229                        let block_start = range.start;
230                        let content_start = content[block_start..]
231                            .find('\n')
232                            .map(|i| block_start + i + 1)
233                            .unwrap_or(content.len());
234
235                        current_block = Some(MarkdownCodeBlockBuilder { content_start });
236                    }
237                }
238                Event::End(TagEnd::CodeBlock) => {
239                    if let Some(builder) = current_block.take() {
240                        // Find where content ends (before the closing fence line)
241                        let block_end = range.end;
242
243                        // Validate range before slicing
244                        if builder.content_start > block_end || builder.content_start > content.len() {
245                            continue;
246                        }
247
248                        let search_range = &content[builder.content_start..block_end.min(content.len())];
249                        let content_end = search_range
250                            .rfind('\n')
251                            .map(|i| builder.content_start + i)
252                            .unwrap_or(builder.content_start);
253
254                        // Only add block if it has valid content range
255                        if content_end >= builder.content_start {
256                            blocks.push(MarkdownCodeBlock {
257                                content_start: builder.content_start,
258                                content_end,
259                            });
260                        }
261                    }
262                }
263                _ => {}
264            }
265        }
266
267        blocks
268    }
269}
270
271/// Information about a markdown code block for recursive formatting
272#[derive(Debug, Clone)]
273pub struct MarkdownCodeBlock {
274    /// Byte offset where the content starts (after opening fence line)
275    pub content_start: usize,
276    /// Byte offset where the content ends (before closing fence line)
277    pub content_end: usize,
278}
279
280/// Builder for MarkdownCodeBlock during parsing
281struct MarkdownCodeBlockBuilder {
282    content_start: usize,
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_detect_fenced_code_blocks() {
291        // The function detects fenced blocks and inline code spans
292        // Fence markers (``` at line start) are now skipped in inline span detection
293
294        // Basic fenced code block with backticks
295        let content = "Some text\n```\ncode here\n```\nMore text";
296        let blocks = CodeBlockUtils::detect_code_blocks(content);
297        // Should find: 1 fenced block (fences are no longer detected as inline spans)
298        assert_eq!(blocks.len(), 1);
299
300        // Check that we have the fenced block
301        let fenced_block = blocks
302            .iter()
303            .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
304        assert!(fenced_block.is_some());
305
306        // Fenced code block with tildes (no inline code detection for ~)
307        let content = "Some text\n~~~\ncode here\n~~~\nMore text";
308        let blocks = CodeBlockUtils::detect_code_blocks(content);
309        assert_eq!(blocks.len(), 1);
310        assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
311
312        // Multiple code blocks
313        let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
314        let blocks = CodeBlockUtils::detect_code_blocks(content);
315        // 2 fenced blocks (fence markers no longer detected as inline spans)
316        assert_eq!(blocks.len(), 2);
317    }
318
319    #[test]
320    fn test_detect_code_blocks_with_language() {
321        // Code block with language identifier
322        let content = "Text\n```rust\nfn main() {}\n```\nMore";
323        let blocks = CodeBlockUtils::detect_code_blocks(content);
324        // 1 fenced block (fence markers no longer detected as inline spans)
325        assert_eq!(blocks.len(), 1);
326        // Check we have the full fenced block
327        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
328        assert!(fenced.is_some());
329    }
330
331    #[test]
332    fn test_unclosed_code_block() {
333        // Unclosed code block should extend to end of content
334        let content = "Text\n```\ncode here\nno closing fence";
335        let blocks = CodeBlockUtils::detect_code_blocks(content);
336        assert_eq!(blocks.len(), 1);
337        assert_eq!(blocks[0].1, content.len());
338    }
339
340    #[test]
341    fn test_indented_code_blocks() {
342        // Basic indented code block
343        let content = "Paragraph\n\n    code line 1\n    code line 2\n\nMore text";
344        let blocks = CodeBlockUtils::detect_code_blocks(content);
345        assert_eq!(blocks.len(), 1);
346        assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
347        assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
348
349        // Indented code with tabs
350        let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
351        let blocks = CodeBlockUtils::detect_code_blocks(content);
352        assert_eq!(blocks.len(), 1);
353    }
354
355    #[test]
356    fn test_indented_code_requires_blank_line() {
357        // Indented lines without preceding blank line are not code blocks
358        let content = "Paragraph\n    indented but not code\nMore text";
359        let blocks = CodeBlockUtils::detect_code_blocks(content);
360        assert_eq!(blocks.len(), 0);
361
362        // With blank line, it becomes a code block
363        let content = "Paragraph\n\n    now it's code\nMore text";
364        let blocks = CodeBlockUtils::detect_code_blocks(content);
365        assert_eq!(blocks.len(), 1);
366    }
367
368    #[test]
369    fn test_indented_content_with_list_markers_is_code_block() {
370        // Per CommonMark spec: 4-space indented content after blank line IS a code block,
371        // even if the content looks like list markers. The indentation takes precedence.
372        // Verified with: echo 'List:\n\n    - Item 1' | npx commonmark
373        // Output: <pre><code>- Item 1</code></pre>
374        let content = "List:\n\n    - Item 1\n    - Item 2\n    * Item 3\n    + Item 4";
375        let blocks = CodeBlockUtils::detect_code_blocks(content);
376        assert_eq!(blocks.len(), 1); // This IS a code block per spec
377
378        // Same for numbered list markers
379        let content = "List:\n\n    1. First\n    2. Second";
380        let blocks = CodeBlockUtils::detect_code_blocks(content);
381        assert_eq!(blocks.len(), 1); // This IS a code block per spec
382    }
383
384    #[test]
385    fn test_actual_list_items_not_code_blocks() {
386        // Actual list items (no preceding blank line + 4 spaces) are NOT code blocks
387        let content = "- Item 1\n- Item 2\n* Item 3";
388        let blocks = CodeBlockUtils::detect_code_blocks(content);
389        assert_eq!(blocks.len(), 0);
390
391        // Nested list items
392        let content = "- Item 1\n  - Nested item\n- Item 2";
393        let blocks = CodeBlockUtils::detect_code_blocks(content);
394        assert_eq!(blocks.len(), 0);
395    }
396
397    #[test]
398    fn test_inline_code_spans_not_detected() {
399        // Inline code spans should NOT be detected as code blocks
400        let content = "Text with `inline code` here";
401        let blocks = CodeBlockUtils::detect_code_blocks(content);
402        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
403
404        // Multiple backtick code span
405        let content = "Text with ``code with ` backtick`` here";
406        let blocks = CodeBlockUtils::detect_code_blocks(content);
407        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
408
409        // Multiple code spans
410        let content = "Has `code1` and `code2` spans";
411        let blocks = CodeBlockUtils::detect_code_blocks(content);
412        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
413    }
414
415    #[test]
416    fn test_unclosed_code_span() {
417        // Unclosed code span should not be detected
418        let content = "Text with `unclosed code span";
419        let blocks = CodeBlockUtils::detect_code_blocks(content);
420        assert_eq!(blocks.len(), 0);
421
422        // Mismatched backticks
423        let content = "Text with ``one style` different close";
424        let blocks = CodeBlockUtils::detect_code_blocks(content);
425        assert_eq!(blocks.len(), 0);
426    }
427
428    #[test]
429    fn test_mixed_code_blocks_and_spans() {
430        let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
431        let blocks = CodeBlockUtils::detect_code_blocks(content);
432        // Should only detect the fenced block, NOT the inline spans
433        assert_eq!(blocks.len(), 1);
434
435        // Check we have the fenced block only
436        assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
437        // Should NOT detect inline spans
438        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
439        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
440    }
441
442    #[test]
443    fn test_is_in_code_block_or_span() {
444        let blocks = vec![(10, 20), (30, 40), (50, 60)];
445
446        // Test positions inside blocks
447        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
448        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
449        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
450
451        // Test positions at boundaries
452        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); // Start is inclusive
453        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); // End is exclusive
454
455        // Test positions outside blocks
456        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
457        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
458        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
459    }
460
461    #[test]
462    fn test_empty_content() {
463        let blocks = CodeBlockUtils::detect_code_blocks("");
464        assert_eq!(blocks.len(), 0);
465    }
466
467    #[test]
468    fn test_code_block_at_start() {
469        let content = "```\ncode\n```\nText after";
470        let blocks = CodeBlockUtils::detect_code_blocks(content);
471        // 1 fenced block (fence markers no longer detected as inline spans)
472        assert_eq!(blocks.len(), 1);
473        assert_eq!(blocks[0].0, 0); // Fenced block starts at 0
474    }
475
476    #[test]
477    fn test_code_block_at_end() {
478        let content = "Text before\n```\ncode\n```";
479        let blocks = CodeBlockUtils::detect_code_blocks(content);
480        // 1 fenced block (fence markers no longer detected as inline spans)
481        assert_eq!(blocks.len(), 1);
482        // Check we have the fenced block
483        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
484        assert!(fenced.is_some());
485    }
486
487    #[test]
488    fn test_nested_fence_markers() {
489        // Code block containing fence markers as content
490        let content = "Text\n````\n```\nnested\n```\n````\nAfter";
491        let blocks = CodeBlockUtils::detect_code_blocks(content);
492        // Should detect: outer block, inner ```, outer ````
493        assert!(!blocks.is_empty());
494        // Check we have the outer block
495        let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
496        assert!(outer.is_some());
497    }
498
499    #[test]
500    fn test_indented_code_with_blank_lines() {
501        // Indented code blocks can contain blank lines
502        let content = "Text\n\n    line1\n\n    line2\n\nAfter";
503        let blocks = CodeBlockUtils::detect_code_blocks(content);
504        // May have multiple blocks due to blank line handling
505        assert!(!blocks.is_empty());
506        // Check that we captured the indented code
507        let all_content: String = blocks
508            .iter()
509            .map(|(s, e)| &content[*s..*e])
510            .collect::<Vec<_>>()
511            .join("");
512        assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
513    }
514
515    #[test]
516    fn test_code_span_with_spaces() {
517        // Code spans should NOT be detected as code blocks
518        let content = "Text ` code with spaces ` more";
519        let blocks = CodeBlockUtils::detect_code_blocks(content);
520        assert_eq!(blocks.len(), 0); // No blocks, only inline span
521    }
522
523    #[test]
524    fn test_fenced_block_with_info_string() {
525        // Fenced code blocks with complex info strings
526        let content = "```rust,no_run,should_panic\ncode\n```";
527        let blocks = CodeBlockUtils::detect_code_blocks(content);
528        // 1 fenced block (fence markers no longer detected as inline spans)
529        assert_eq!(blocks.len(), 1);
530        assert_eq!(blocks[0].0, 0);
531    }
532
533    #[test]
534    fn test_indented_fences_not_code_blocks() {
535        // Indented fence markers should still work as fences
536        let content = "Text\n  ```\n  code\n  ```\nAfter";
537        let blocks = CodeBlockUtils::detect_code_blocks(content);
538        // Only 1 fenced block (indented fences still work)
539        assert_eq!(blocks.len(), 1);
540    }
541
542    // Issue #175: Backticks in info string invalidate the fence
543    #[test]
544    fn test_backticks_in_info_string_not_code_block() {
545        // Per CommonMark spec: "If the info string comes after a backtick fence,
546        // it may not contain any backtick characters."
547        // So ```something``` is NOT a valid fence - the backticks are treated as inline code.
548        // Verified with: echo '```something```' | npx commonmark
549        // Output: <p><code>something</code></p>
550        let content = "```something```\n\n```bash\n# comment\n```";
551        let blocks = CodeBlockUtils::detect_code_blocks(content);
552        // Should find only the valid ```bash block, NOT the invalid ```something```
553        assert_eq!(blocks.len(), 1);
554        // The valid block should contain "# comment"
555        assert!(content[blocks[0].0..blocks[0].1].contains("# comment"));
556    }
557
558    #[test]
559    fn test_issue_175_reproduction() {
560        // Full reproduction of issue #175
561        let content = "```something```\n\n```bash\n# Have a parrot\necho \"🦜\"\n```";
562        let blocks = CodeBlockUtils::detect_code_blocks(content);
563        // Only the bash block is a code block
564        assert_eq!(blocks.len(), 1);
565        assert!(content[blocks[0].0..blocks[0].1].contains("Have a parrot"));
566    }
567
568    #[test]
569    fn test_tilde_fence_allows_tildes_in_info_string() {
570        // Tilde fences CAN have tildes in info string (only backtick restriction exists)
571        // ~~~abc~~~ opens an unclosed code block with info string "abc~~~"
572        let content = "~~~abc~~~\ncode content\n~~~";
573        let blocks = CodeBlockUtils::detect_code_blocks(content);
574        // This is a valid tilde fence that opens and closes
575        assert_eq!(blocks.len(), 1);
576    }
577
578    #[test]
579    fn test_nested_longer_fence_contains_shorter() {
580        // Longer fence (````) can contain shorter fence (```) as content
581        let content = "````\n```\nnested content\n```\n````";
582        let blocks = CodeBlockUtils::detect_code_blocks(content);
583        assert_eq!(blocks.len(), 1);
584        assert!(content[blocks[0].0..blocks[0].1].contains("nested content"));
585    }
586
587    #[test]
588    fn test_mixed_fence_types() {
589        // Tilde fence contains backtick markers as content
590        let content = "~~~\n```\nmixed content\n~~~";
591        let blocks = CodeBlockUtils::detect_code_blocks(content);
592        assert_eq!(blocks.len(), 1);
593        assert!(content[blocks[0].0..blocks[0].1].contains("mixed content"));
594    }
595
596    #[test]
597    fn test_indented_code_in_list_issue_276() {
598        // Issue #276: Indented code block inside a list should be detected by pulldown-cmark
599        let content = r#"1. First item
6002. Second item with code:
601
602        # This is a code block in a list
603        print("Hello, world!")
604
6054. Third item"#;
606
607        let blocks = CodeBlockUtils::detect_code_blocks(content);
608        // pulldown-cmark SHOULD detect this indented code block inside the list
609        assert!(!blocks.is_empty(), "Should detect indented code block inside list");
610
611        // Verify the detected block contains our code
612        let all_content: String = blocks
613            .iter()
614            .map(|(s, e)| &content[*s..*e])
615            .collect::<Vec<_>>()
616            .join("");
617        assert!(
618            all_content.contains("code block in a list") || all_content.contains("print"),
619            "Detected block should contain the code content: {all_content:?}"
620        );
621    }
622
623    #[test]
624    fn test_detect_markdown_code_blocks() {
625        let content = r#"# Example
626
627```markdown
628# Heading
629Content here
630```
631
632```md
633Another heading
634More content
635```
636
637```rust
638// Not markdown
639fn main() {}
640```
641"#;
642
643        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
644
645        // Should detect 2 blocks (markdown and md, not rust)
646        assert_eq!(
647            blocks.len(),
648            2,
649            "Should detect exactly 2 markdown blocks, got {blocks:?}"
650        );
651
652        // First block should be the ```markdown block
653        let first = &blocks[0];
654        let first_content = &content[first.content_start..first.content_end];
655        assert!(
656            first_content.contains("# Heading"),
657            "First block should contain '# Heading', got: {first_content:?}"
658        );
659
660        // Second block should be the ```md block
661        let second = &blocks[1];
662        let second_content = &content[second.content_start..second.content_end];
663        assert!(
664            second_content.contains("Another heading"),
665            "Second block should contain 'Another heading', got: {second_content:?}"
666        );
667    }
668
669    #[test]
670    fn test_detect_markdown_code_blocks_empty() {
671        let content = "# Just a heading\n\nNo code blocks here\n";
672        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
673        assert_eq!(blocks.len(), 0);
674    }
675
676    #[test]
677    fn test_detect_markdown_code_blocks_case_insensitive() {
678        let content = "```MARKDOWN\nContent\n```\n";
679        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
680        assert_eq!(blocks.len(), 1);
681    }
682
683    #[test]
684    fn test_detect_markdown_code_blocks_at_eof_no_trailing_newline() {
685        // Block at end of file without trailing newline after closing fence
686        let content = "# Doc\n\n```markdown\nContent\n```";
687        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
688        assert_eq!(blocks.len(), 1);
689        // Content should be extractable without panic
690        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
691        assert!(block_content.contains("Content"));
692    }
693
694    #[test]
695    fn test_detect_markdown_code_blocks_single_line_content() {
696        // Single line of content, no extra newlines
697        let content = "```markdown\nX\n```\n";
698        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
699        assert_eq!(blocks.len(), 1);
700        let block_content = &content[blocks[0].content_start..blocks[0].content_end];
701        assert_eq!(block_content, "X");
702    }
703
704    #[test]
705    fn test_detect_markdown_code_blocks_empty_content() {
706        // Block with no content between fences
707        let content = "```markdown\n```\n";
708        let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
709        // Should detect block but with empty range or not at all
710        // Either behavior is acceptable as long as no panic
711        if !blocks.is_empty() {
712            // If detected, content range should be valid
713            assert!(blocks[0].content_start <= blocks[0].content_end);
714        }
715    }
716
717    #[test]
718    fn test_detect_markdown_code_blocks_validates_ranges() {
719        // Ensure no panic on various edge cases
720        let test_cases = [
721            "",                             // Empty content
722            "```markdown",                  // Unclosed block
723            "```markdown\n",                // Unclosed block with newline
724            "```\n```",                     // Non-markdown block
725            "```markdown\n```",             // Empty markdown block
726            "   ```markdown\n   X\n   ```", // Indented block
727        ];
728
729        for content in test_cases {
730            // Should not panic
731            let blocks = CodeBlockUtils::detect_markdown_code_blocks(content);
732            // All detected blocks should have valid ranges
733            for block in &blocks {
734                assert!(
735                    block.content_start <= block.content_end,
736                    "Invalid range in content: {content:?}"
737                );
738                assert!(
739                    block.content_end <= content.len(),
740                    "Range exceeds content length in: {content:?}"
741                );
742            }
743        }
744    }
745
746    // ── is_in_code_block binary search tests ─────────────────────────────
747
748    #[test]
749    fn test_is_in_code_block_empty_blocks() {
750        assert!(!CodeBlockUtils::is_in_code_block(&[], 0));
751        assert!(!CodeBlockUtils::is_in_code_block(&[], 100));
752        assert!(!CodeBlockUtils::is_in_code_block(&[], usize::MAX));
753    }
754
755    #[test]
756    fn test_is_in_code_block_single_range() {
757        let blocks = [(10, 20)];
758        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 0));
759        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 9));
760        assert!(CodeBlockUtils::is_in_code_block(&blocks, 10));
761        assert!(CodeBlockUtils::is_in_code_block(&blocks, 15));
762        assert!(CodeBlockUtils::is_in_code_block(&blocks, 19));
763        // end is exclusive
764        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 20));
765        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 21));
766    }
767
768    #[test]
769    fn test_is_in_code_block_multiple_ranges() {
770        let blocks = [(5, 10), (20, 30), (50, 60)];
771        // Before all
772        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 0));
773        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 4));
774        // In first
775        assert!(CodeBlockUtils::is_in_code_block(&blocks, 5));
776        assert!(CodeBlockUtils::is_in_code_block(&blocks, 9));
777        // Gap between first and second
778        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 10));
779        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 15));
780        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 19));
781        // In second
782        assert!(CodeBlockUtils::is_in_code_block(&blocks, 20));
783        assert!(CodeBlockUtils::is_in_code_block(&blocks, 29));
784        // Gap between second and third
785        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 30));
786        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 49));
787        // In third
788        assert!(CodeBlockUtils::is_in_code_block(&blocks, 50));
789        assert!(CodeBlockUtils::is_in_code_block(&blocks, 59));
790        // After all
791        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 60));
792        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 1000));
793    }
794
795    #[test]
796    fn test_is_in_code_block_adjacent_ranges() {
797        // Ranges that are exactly adjacent (end of one == start of next)
798        let blocks = [(0, 10), (10, 20), (20, 30)];
799        assert!(CodeBlockUtils::is_in_code_block(&blocks, 0));
800        assert!(CodeBlockUtils::is_in_code_block(&blocks, 9));
801        assert!(CodeBlockUtils::is_in_code_block(&blocks, 10));
802        assert!(CodeBlockUtils::is_in_code_block(&blocks, 19));
803        assert!(CodeBlockUtils::is_in_code_block(&blocks, 20));
804        assert!(CodeBlockUtils::is_in_code_block(&blocks, 29));
805        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 30));
806    }
807
808    #[test]
809    fn test_is_in_code_block_single_byte_range() {
810        let blocks = [(5, 6)];
811        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 4));
812        assert!(CodeBlockUtils::is_in_code_block(&blocks, 5));
813        assert!(!CodeBlockUtils::is_in_code_block(&blocks, 6));
814    }
815
816    #[test]
817    fn test_is_in_code_block_matches_linear_scan() {
818        // Verify binary search produces identical results to linear scan
819        // for a realistic document layout
820        let content = "# Heading\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nSome text\n\n```\nmore code\n```\n\nEnd\n";
821        let blocks = CodeBlockUtils::detect_code_blocks(content);
822
823        for pos in 0..content.len() {
824            let binary = CodeBlockUtils::is_in_code_block(&blocks, pos);
825            let linear = blocks.iter().any(|&(s, e)| pos >= s && pos < e);
826            assert_eq!(
827                binary, linear,
828                "Mismatch at pos {pos}: binary={binary}, linear={linear}, blocks={blocks:?}"
829            );
830        }
831    }
832
833    #[test]
834    fn test_is_in_code_block_at_range_boundaries() {
835        // Exhaustive boundary testing for every block start/end
836        let blocks = [(100, 200), (300, 400), (500, 600)];
837        for &(start, end) in &blocks {
838            assert!(
839                !CodeBlockUtils::is_in_code_block(&blocks, start - 1),
840                "pos={} should be outside",
841                start - 1
842            );
843            assert!(
844                CodeBlockUtils::is_in_code_block(&blocks, start),
845                "pos={start} should be inside"
846            );
847            assert!(
848                CodeBlockUtils::is_in_code_block(&blocks, end - 1),
849                "pos={} should be inside",
850                end - 1
851            );
852            assert!(
853                !CodeBlockUtils::is_in_code_block(&blocks, end),
854                "pos={end} should be outside"
855            );
856        }
857    }
858}