rumdl_lib/utils/
code_block_utils.rs

1//!
2//! Utility functions for detecting and handling code blocks and code spans in Markdown for rumdl.
3
4use crate::rules::blockquote_utils::BlockquoteUtils;
5
6/// Classification of code blocks relative to list contexts
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub enum CodeBlockContext {
9    /// Code block that separates lists (root-level, with blank lines)
10    Standalone,
11    /// Code block that continues a list (properly indented)
12    Indented,
13    /// Code block adjacent to list content (edge case, defaults to non-breaking)
14    Adjacent,
15}
16
17/// Utility functions for detecting and handling code blocks in Markdown
18pub struct CodeBlockUtils;
19
20impl CodeBlockUtils {
21    /// Detect all code blocks in the content (NOT including inline code spans)
22    pub fn detect_code_blocks(content: &str) -> Vec<(usize, usize)> {
23        let mut blocks = Vec::new();
24        let mut in_code_block = false;
25        let mut code_block_start = 0;
26        let mut opening_fence_char = ' ';
27        let mut opening_fence_len = 0;
28
29        // Pre-compute line positions for efficient offset calculation
30        let lines: Vec<&str> = content.lines().collect();
31        let mut line_positions = Vec::with_capacity(lines.len());
32        let mut pos = 0;
33        for line in &lines {
34            line_positions.push(pos);
35            pos += line.len() + 1; // +1 for newline
36        }
37
38        // Pre-compute list context for each line to properly handle fence indentation
39        // Inside list items, fences can have more absolute indentation (relative indent still <= 3)
40        let mut list_context_indent: Vec<usize> = vec![0; lines.len()];
41        {
42            let mut in_list = false;
43            let mut continuation_indent: usize = 0;
44
45            for (i, line) in lines.iter().enumerate() {
46                let mut line_no_bq = line.to_string();
47                while BlockquoteUtils::is_blockquote(&line_no_bq) {
48                    line_no_bq = BlockquoteUtils::extract_content(&line_no_bq);
49                }
50
51                let indent_level = line_no_bq.len() - line_no_bq.trim_start().len();
52                let trimmed = line_no_bq.trim_start();
53
54                // Check if this is a list item
55                let is_ordered = {
56                    let first_char = trimmed.chars().next();
57                    first_char.is_some_and(|c| c.is_numeric())
58                        && trimmed.chars().position(|c| c == '.' || c == ')').is_some_and(|pos| {
59                            pos > 0
60                                && trimmed[..pos].chars().all(|c| c.is_numeric())
61                                && trimmed.chars().nth(pos + 1).is_some_and(|c| c == ' ' || c == '\t')
62                        })
63                };
64                let is_list_item =
65                    trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || is_ordered;
66
67                if is_list_item {
68                    in_list = true;
69                    let marker_width =
70                        if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
71                            1
72                        } else {
73                            trimmed.chars().take_while(|c| c.is_numeric()).count() + 1
74                        };
75                    let after_marker = &trimmed[marker_width..];
76                    let spaces_after = after_marker.chars().take_while(|c| *c == ' ' || *c == '\t').count();
77                    continuation_indent = indent_level + marker_width + spaces_after;
78                } else if in_list && !line_no_bq.trim().is_empty() && indent_level < continuation_indent {
79                    in_list = false;
80                    continuation_indent = 0;
81                }
82
83                list_context_indent[i] = if in_list { continuation_indent } else { 0 };
84            }
85        }
86
87        // Find fenced code blocks
88        for (i, line) in lines.iter().enumerate() {
89            let line_start = line_positions[i];
90
91            // Strip ALL blockquote prefixes to properly detect fenced code blocks inside blockquotes
92            // This handles nested blockquotes by recursively stripping '>' markers
93            let mut line_without_blockquote = line.to_string();
94            while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
95                line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
96            }
97
98            // Calculate indentation before trimming
99            let indent = line_without_blockquote.len() - line_without_blockquote.trim_start().len();
100            let trimmed = line_without_blockquote.trim_start();
101
102            // Check if this line could be a code fence
103            // CommonMark: fences must have at most 3 spaces of indentation RELATIVE to container
104            // Inside list items, the container is the list content, not the document edge
105            let effective_indent = indent.saturating_sub(list_context_indent[i]);
106            if effective_indent <= 3 && (trimmed.starts_with("```") || trimmed.starts_with("~~~")) {
107                let fence_char = trimmed.chars().next().unwrap();
108                let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
109
110                if !in_code_block && fence_len >= 3 {
111                    // Opening fence
112                    code_block_start = line_start;
113                    in_code_block = true;
114                    opening_fence_char = fence_char;
115                    opening_fence_len = fence_len;
116                } else if in_code_block && fence_char == opening_fence_char && fence_len >= opening_fence_len {
117                    // Closing fence - must match opening fence character and be at least as long
118                    let code_block_end = line_start + line.len();
119                    blocks.push((code_block_start, code_block_end));
120                    in_code_block = false;
121                    opening_fence_char = ' ';
122                    opening_fence_len = 0;
123                }
124                // If we're in a code block but the fence doesn't match, it's just content
125            }
126        }
127
128        // Handle unclosed code blocks
129        if in_code_block {
130            blocks.push((code_block_start, content.len()));
131        }
132
133        // Find indented code blocks (4+ spaces or tab at start of line)
134        // According to CommonMark, indented code blocks must be preceded by a blank line
135        // (unless they're at the start of the document or after a block-level element)
136        //
137        // IMPORTANT: We must handle list contexts correctly:
138        // - At document level: 4 spaces + blank line before = code block
139        // - In a list context: 4 spaces = continuation paragraph (NOT a code block)
140        // - In a list context: 8+ spaces (depending on list marker) = code block
141        let mut in_indented_block = false;
142        let mut indented_block_start = 0;
143        let mut in_list_context = false;
144        let mut list_continuation_indent: usize = 0;
145
146        for (line_idx, line) in lines.iter().enumerate() {
147            let line_start = if line_idx < line_positions.len() {
148                line_positions[line_idx]
149            } else {
150                0
151            };
152
153            // Strip ALL blockquote prefixes to properly detect indented code blocks inside blockquotes
154            let mut line_without_blockquote = line.to_string();
155            while BlockquoteUtils::is_blockquote(&line_without_blockquote) {
156                line_without_blockquote = BlockquoteUtils::extract_content(&line_without_blockquote);
157            }
158
159            // Calculate the indent level
160            let indent_level = line_without_blockquote.len() - line_without_blockquote.trim_start().len();
161            let is_indented = line_without_blockquote.starts_with("    ") || line_without_blockquote.starts_with("\t");
162
163            // Check if this looks like a list item (has list marker after indentation)
164            let trimmed = line_without_blockquote.trim_start();
165
166            // Check for ordered list marker: 1-9 digits followed by . or )
167            // Must be followed by at least one space
168            let is_ordered_list = {
169                let mut chars = trimmed.chars();
170                let first_char = chars.next();
171                if !first_char.is_some_and(|c| c.is_numeric()) {
172                    false
173                } else {
174                    // Find delimiter position (. or ))
175                    let delimiter_char_pos = trimmed.chars().position(|c| c == '.' || c == ')');
176                    match delimiter_char_pos {
177                        Some(char_pos) if char_pos > 0 => {
178                            // Convert character position to byte position for slicing
179                            let byte_pos = trimmed.char_indices().nth(char_pos).map(|(i, _)| i);
180                            if let Some(byte_pos) = byte_pos {
181                                // All chars before delimiter must be digits
182                                let all_digits = trimmed[..byte_pos].chars().all(|c| c.is_numeric());
183                                // Must be followed by space or tab
184                                let has_space =
185                                    trimmed.chars().nth(char_pos + 1).is_some_and(|c| c == ' ' || c == '\t');
186                                all_digits && has_space
187                            } else {
188                                false
189                            }
190                        }
191                        _ => false,
192                    }
193                }
194            };
195
196            let is_list_item =
197                trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") || is_ordered_list;
198
199            // Check if previous line was blank (after stripping blockquote markers)
200            let prev_line_without_blockquote = if line_idx > 0 {
201                let mut prev = lines[line_idx - 1].to_string();
202                while BlockquoteUtils::is_blockquote(&prev) {
203                    prev = BlockquoteUtils::extract_content(&prev);
204                }
205                prev
206            } else {
207                String::new()
208            };
209            let prev_blank = line_idx > 0 && prev_line_without_blockquote.trim().is_empty();
210
211            // Update list context tracking
212            if is_list_item {
213                // We're starting or continuing a list
214                in_list_context = true;
215
216                // Calculate continuation indent per CommonMark spec:
217                // "The spaces of indentation after the list marker determine how much
218                // relative indentation is needed. The first continuation block must be
219                // indented to the column of the first character other than a space after the marker."
220
221                let marker_column = indent_level;
222                let marker_width =
223                    if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
224                        1 // Single character marker (-, *, +)
225                    } else {
226                        // Ordered list marker: count digits + delimiter (. or ))
227                        trimmed.chars().take_while(|c| c.is_numeric()).count() + 1
228                    };
229
230                // Count actual spaces/tabs after marker (CommonMark allows 1-4 spaces)
231                // Find the first non-space character after the marker
232                let after_marker = &trimmed[marker_width..];
233                let spaces_after_marker = after_marker.chars().take_while(|c| *c == ' ' || *c == '\t').count();
234
235                // Continuation indent = marker column + marker width + actual spaces
236                // This is the column where the first content character appears
237                list_continuation_indent = marker_column + marker_width + spaces_after_marker;
238            } else if in_list_context
239                && !line_without_blockquote.trim().is_empty()
240                && indent_level < list_continuation_indent
241            {
242                // Outdented non-empty line ends the list context
243                in_list_context = false;
244                list_continuation_indent = 0;
245            }
246
247            // Determine if this indented line is:
248            // 1. A list continuation paragraph (indent >= continuation_indent, < continuation_indent + 4)
249            // 2. A code block within a list (indent >= continuation_indent + 4)
250            // 3. A document-level code block (not in list context)
251
252            let is_list_continuation_paragraph = in_list_context
253                && indent_level >= list_continuation_indent
254                && indent_level < (list_continuation_indent + 4);
255
256            let is_code_block_in_list = in_list_context && indent_level >= (list_continuation_indent + 4);
257
258            // Handle indented code blocks
259            if is_indented && !line_without_blockquote.trim().is_empty() && !is_list_item {
260                if is_code_block_in_list {
261                    // Code block within list (CommonMark Example 270, 273, 274)
262                    // Requires continuation_indent + 4 spaces, and must have blank line before
263                    if !in_indented_block && prev_blank {
264                        in_indented_block = true;
265                        indented_block_start = line_start;
266                    }
267                } else if !is_list_continuation_paragraph {
268                    // Document-level indented code block (not in list)
269                    if !in_indented_block && prev_blank {
270                        in_indented_block = true;
271                        indented_block_start = line_start;
272                    }
273                }
274                // If is_list_continuation_paragraph, don't treat as code block
275            } else if in_indented_block {
276                // End of indented code block
277                let block_end = if line_idx > 0 && line_idx - 1 < line_positions.len() {
278                    line_positions[line_idx - 1] + lines[line_idx - 1].len()
279                } else {
280                    line_start
281                };
282                blocks.push((indented_block_start, block_end));
283                in_indented_block = false;
284            }
285        }
286
287        // Handle indented block that goes to end of file
288        if in_indented_block {
289            blocks.push((indented_block_start, content.len()));
290        }
291
292        // Note: We DO NOT include inline code spans here - they are not code blocks!
293        // Inline code spans are handled separately by the code span parser.
294
295        blocks.sort_by(|a, b| a.0.cmp(&b.0));
296        blocks
297    }
298
299    /// Check if a position is within a code block (for compatibility)
300    pub fn is_in_code_block_or_span(blocks: &[(usize, usize)], pos: usize) -> bool {
301        // This is a compatibility function - it only checks code blocks now, not spans
302        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
303    }
304
305    /// Check if a position is within a code block (NOT including inline code spans)
306    pub fn is_in_code_block(blocks: &[(usize, usize)], pos: usize) -> bool {
307        blocks.iter().any(|&(start, end)| pos >= start && pos < end)
308    }
309
310    /// Analyze code block context relative to list parsing
311    /// This is the core function implementing Design #3's three-tier classification
312    pub fn analyze_code_block_context(
313        lines: &[crate::lint_context::LineInfo],
314        line_idx: usize,
315        min_continuation_indent: usize,
316    ) -> CodeBlockContext {
317        if let Some(line_info) = lines.get(line_idx) {
318            // Rule 1: Indentation Analysis - Is it sufficiently indented for list continuation?
319            if line_info.indent >= min_continuation_indent {
320                return CodeBlockContext::Indented;
321            }
322
323            // Rule 2: Blank Line Context - Check for structural separation indicators
324            let (prev_blanks, next_blanks) = Self::count_surrounding_blank_lines(lines, line_idx);
325
326            // Rule 3: Standalone Detection - Insufficient indentation + blank line separation
327            // This is the key fix: root-level code blocks with blank lines separate lists
328            if prev_blanks > 0 || next_blanks > 0 {
329                return CodeBlockContext::Standalone;
330            }
331
332            // Rule 4: Default - Adjacent (conservative, non-breaking for edge cases)
333            CodeBlockContext::Adjacent
334        } else {
335            // Fallback for invalid line index
336            CodeBlockContext::Adjacent
337        }
338    }
339
340    /// Count blank lines before and after the given line index
341    fn count_surrounding_blank_lines(lines: &[crate::lint_context::LineInfo], line_idx: usize) -> (usize, usize) {
342        let mut prev_blanks = 0;
343        let mut next_blanks = 0;
344
345        // Count blank lines before (look backwards)
346        for i in (0..line_idx).rev() {
347            if let Some(line) = lines.get(i) {
348                if line.is_blank {
349                    prev_blanks += 1;
350                } else {
351                    break;
352                }
353            } else {
354                break;
355            }
356        }
357
358        // Count blank lines after (look forwards)
359        for i in (line_idx + 1)..lines.len() {
360            if let Some(line) = lines.get(i) {
361                if line.is_blank {
362                    next_blanks += 1;
363                } else {
364                    break;
365                }
366            } else {
367                break;
368            }
369        }
370
371        (prev_blanks, next_blanks)
372    }
373
374    /// Calculate minimum indentation required for code block to continue a list
375    /// Based on the most recent list item's marker width
376    pub fn calculate_min_continuation_indent(
377        content: &str,
378        lines: &[crate::lint_context::LineInfo],
379        current_line_idx: usize,
380    ) -> usize {
381        // Look backwards to find the most recent list item
382        for i in (0..current_line_idx).rev() {
383            if let Some(line_info) = lines.get(i) {
384                if let Some(list_item) = &line_info.list_item {
385                    // Calculate minimum continuation indent for this list item
386                    return if list_item.is_ordered {
387                        list_item.marker_column + list_item.marker.len() + 1 // +1 for space after marker
388                    } else {
389                        list_item.marker_column + 2 // Unordered lists need marker + space (min 2)
390                    };
391                }
392
393                // Stop at structural separators that would break list context
394                if line_info.heading.is_some() || Self::is_structural_separator(line_info.content(content)) {
395                    break;
396                }
397            }
398        }
399
400        0 // No list context found
401    }
402
403    /// Check if content is a structural separator (headings, horizontal rules, etc.)
404    fn is_structural_separator(content: &str) -> bool {
405        let trimmed = content.trim();
406        trimmed.starts_with("---")
407            || trimmed.starts_with("***")
408            || trimmed.starts_with("___")
409            || trimmed.contains('|') // Tables
410            || trimmed.starts_with(">") // Blockquotes
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    #[test]
419    fn test_detect_fenced_code_blocks() {
420        // The function detects fenced blocks and inline code spans
421        // Fence markers (``` at line start) are now skipped in inline span detection
422
423        // Basic fenced code block with backticks
424        let content = "Some text\n```\ncode here\n```\nMore text";
425        let blocks = CodeBlockUtils::detect_code_blocks(content);
426        // Should find: 1 fenced block (fences are no longer detected as inline spans)
427        assert_eq!(blocks.len(), 1);
428
429        // Check that we have the fenced block
430        let fenced_block = blocks
431            .iter()
432            .find(|(start, end)| end - start > 10 && content[*start..*end].contains("code here"));
433        assert!(fenced_block.is_some());
434
435        // Fenced code block with tildes (no inline code detection for ~)
436        let content = "Some text\n~~~\ncode here\n~~~\nMore text";
437        let blocks = CodeBlockUtils::detect_code_blocks(content);
438        assert_eq!(blocks.len(), 1);
439        assert_eq!(&content[blocks[0].0..blocks[0].1], "~~~\ncode here\n~~~");
440
441        // Multiple code blocks
442        let content = "Text\n```\ncode1\n```\nMiddle\n~~~\ncode2\n~~~\nEnd";
443        let blocks = CodeBlockUtils::detect_code_blocks(content);
444        // 2 fenced blocks (fence markers no longer detected as inline spans)
445        assert_eq!(blocks.len(), 2);
446    }
447
448    #[test]
449    fn test_detect_code_blocks_with_language() {
450        // Code block with language identifier
451        let content = "Text\n```rust\nfn main() {}\n```\nMore";
452        let blocks = CodeBlockUtils::detect_code_blocks(content);
453        // 1 fenced block (fence markers no longer detected as inline spans)
454        assert_eq!(blocks.len(), 1);
455        // Check we have the full fenced block
456        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("fn main"));
457        assert!(fenced.is_some());
458    }
459
460    #[test]
461    fn test_unclosed_code_block() {
462        // Unclosed code block should extend to end of content
463        let content = "Text\n```\ncode here\nno closing fence";
464        let blocks = CodeBlockUtils::detect_code_blocks(content);
465        assert_eq!(blocks.len(), 1);
466        assert_eq!(blocks[0].1, content.len());
467    }
468
469    #[test]
470    fn test_indented_code_blocks() {
471        // Basic indented code block
472        let content = "Paragraph\n\n    code line 1\n    code line 2\n\nMore text";
473        let blocks = CodeBlockUtils::detect_code_blocks(content);
474        assert_eq!(blocks.len(), 1);
475        assert!(content[blocks[0].0..blocks[0].1].contains("code line 1"));
476        assert!(content[blocks[0].0..blocks[0].1].contains("code line 2"));
477
478        // Indented code with tabs
479        let content = "Paragraph\n\n\tcode with tab\n\tanother line\n\nText";
480        let blocks = CodeBlockUtils::detect_code_blocks(content);
481        assert_eq!(blocks.len(), 1);
482    }
483
484    #[test]
485    fn test_indented_code_requires_blank_line() {
486        // Indented lines without preceding blank line are not code blocks
487        let content = "Paragraph\n    indented but not code\nMore text";
488        let blocks = CodeBlockUtils::detect_code_blocks(content);
489        assert_eq!(blocks.len(), 0);
490
491        // With blank line, it becomes a code block
492        let content = "Paragraph\n\n    now it's code\nMore text";
493        let blocks = CodeBlockUtils::detect_code_blocks(content);
494        assert_eq!(blocks.len(), 1);
495    }
496
497    #[test]
498    fn test_list_items_not_code_blocks() {
499        // List items should not be detected as code blocks
500        let content = "List:\n\n    - Item 1\n    - Item 2\n    * Item 3\n    + Item 4";
501        let blocks = CodeBlockUtils::detect_code_blocks(content);
502        assert_eq!(blocks.len(), 0);
503
504        // Numbered lists
505        let content = "List:\n\n    1. First\n    2. Second\n    1) Also first";
506        let blocks = CodeBlockUtils::detect_code_blocks(content);
507        assert_eq!(blocks.len(), 0);
508    }
509
510    #[test]
511    fn test_inline_code_spans_not_detected() {
512        // Inline code spans should NOT be detected as code blocks
513        let content = "Text with `inline code` here";
514        let blocks = CodeBlockUtils::detect_code_blocks(content);
515        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
516
517        // Multiple backtick code span
518        let content = "Text with ``code with ` backtick`` here";
519        let blocks = CodeBlockUtils::detect_code_blocks(content);
520        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
521
522        // Multiple code spans
523        let content = "Has `code1` and `code2` spans";
524        let blocks = CodeBlockUtils::detect_code_blocks(content);
525        assert_eq!(blocks.len(), 0); // No blocks, only inline spans
526    }
527
528    #[test]
529    fn test_unclosed_code_span() {
530        // Unclosed code span should not be detected
531        let content = "Text with `unclosed code span";
532        let blocks = CodeBlockUtils::detect_code_blocks(content);
533        assert_eq!(blocks.len(), 0);
534
535        // Mismatched backticks
536        let content = "Text with ``one style` different close";
537        let blocks = CodeBlockUtils::detect_code_blocks(content);
538        assert_eq!(blocks.len(), 0);
539    }
540
541    #[test]
542    fn test_mixed_code_blocks_and_spans() {
543        let content = "Has `span1` text\n```\nblock\n```\nand `span2`";
544        let blocks = CodeBlockUtils::detect_code_blocks(content);
545        // Should only detect the fenced block, NOT the inline spans
546        assert_eq!(blocks.len(), 1);
547
548        // Check we have the fenced block only
549        assert!(blocks.iter().any(|(s, e)| content[*s..*e].contains("block")));
550        // Should NOT detect inline spans
551        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span1`"));
552        assert!(!blocks.iter().any(|(s, e)| &content[*s..*e] == "`span2`"));
553    }
554
555    #[test]
556    fn test_is_in_code_block_or_span() {
557        let blocks = vec![(10, 20), (30, 40), (50, 60)];
558
559        // Test positions inside blocks
560        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 15));
561        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 35));
562        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 55));
563
564        // Test positions at boundaries
565        assert!(CodeBlockUtils::is_in_code_block_or_span(&blocks, 10)); // Start is inclusive
566        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 20)); // End is exclusive
567
568        // Test positions outside blocks
569        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 5));
570        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 25));
571        assert!(!CodeBlockUtils::is_in_code_block_or_span(&blocks, 65));
572    }
573
574    #[test]
575    fn test_empty_content() {
576        let blocks = CodeBlockUtils::detect_code_blocks("");
577        assert_eq!(blocks.len(), 0);
578    }
579
580    #[test]
581    fn test_code_block_at_start() {
582        let content = "```\ncode\n```\nText after";
583        let blocks = CodeBlockUtils::detect_code_blocks(content);
584        // 1 fenced block (fence markers no longer detected as inline spans)
585        assert_eq!(blocks.len(), 1);
586        assert_eq!(blocks[0].0, 0); // Fenced block starts at 0
587    }
588
589    #[test]
590    fn test_code_block_at_end() {
591        let content = "Text before\n```\ncode\n```";
592        let blocks = CodeBlockUtils::detect_code_blocks(content);
593        // 1 fenced block (fence markers no longer detected as inline spans)
594        assert_eq!(blocks.len(), 1);
595        // Check we have the fenced block
596        let fenced = blocks.iter().find(|(s, e)| content[*s..*e].contains("code"));
597        assert!(fenced.is_some());
598    }
599
600    #[test]
601    fn test_nested_fence_markers() {
602        // Code block containing fence markers as content
603        let content = "Text\n````\n```\nnested\n```\n````\nAfter";
604        let blocks = CodeBlockUtils::detect_code_blocks(content);
605        // Should detect: outer block, inner ```, outer ````
606        assert!(!blocks.is_empty());
607        // Check we have the outer block
608        let outer = blocks.iter().find(|(s, e)| content[*s..*e].contains("nested"));
609        assert!(outer.is_some());
610    }
611
612    #[test]
613    fn test_indented_code_with_blank_lines() {
614        // Indented code blocks can contain blank lines
615        let content = "Text\n\n    line1\n\n    line2\n\nAfter";
616        let blocks = CodeBlockUtils::detect_code_blocks(content);
617        // May have multiple blocks due to blank line handling
618        assert!(!blocks.is_empty());
619        // Check that we captured the indented code
620        let all_content: String = blocks
621            .iter()
622            .map(|(s, e)| &content[*s..*e])
623            .collect::<Vec<_>>()
624            .join("");
625        assert!(all_content.contains("line1") || content[blocks[0].0..blocks[0].1].contains("line1"));
626    }
627
628    #[test]
629    fn test_code_span_with_spaces() {
630        // Code spans should NOT be detected as code blocks
631        let content = "Text ` code with spaces ` more";
632        let blocks = CodeBlockUtils::detect_code_blocks(content);
633        assert_eq!(blocks.len(), 0); // No blocks, only inline span
634    }
635
636    #[test]
637    fn test_fenced_block_with_info_string() {
638        // Fenced code blocks with complex info strings
639        let content = "```rust,no_run,should_panic\ncode\n```";
640        let blocks = CodeBlockUtils::detect_code_blocks(content);
641        // 1 fenced block (fence markers no longer detected as inline spans)
642        assert_eq!(blocks.len(), 1);
643        assert_eq!(blocks[0].0, 0);
644    }
645
646    #[test]
647    fn test_indented_fences_not_code_blocks() {
648        // Indented fence markers should still work as fences
649        let content = "Text\n  ```\n  code\n  ```\nAfter";
650        let blocks = CodeBlockUtils::detect_code_blocks(content);
651        // Only 1 fenced block (indented fences still work)
652        assert_eq!(blocks.len(), 1);
653    }
654}