rumdl_lib/rules/
code_block_utils.rs

1use crate::utils::element_cache::ElementCache;
2use crate::utils::range_utils::LineIndex;
3use regex::Regex;
4use std::fmt;
5use std::sync::LazyLock;
6
7// Standard code block detection patterns
8static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
9static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
10static ALTERNATE_FENCED_CODE_BLOCK_START: LazyLock<Regex> =
11    LazyLock::new(|| Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap());
12static ALTERNATE_FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)~~~\s*$").unwrap());
13static LIST_ITEM_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap());
14
15/// Utility functions for detecting and handling code blocks in Markdown documents
16pub struct CodeBlockUtils;
17
18impl CodeBlockUtils {
19    /// Check if a line is inside a code block
20    pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
21        let lines: Vec<&str> = content.lines().collect();
22        if line_num >= lines.len() {
23            return false;
24        }
25
26        let mut in_fenced_code = false;
27        let mut in_alternate_fenced = false;
28
29        for (i, line) in lines.iter().enumerate() {
30            if i > line_num {
31                break;
32            }
33
34            if FENCED_CODE_BLOCK_START.is_match(line) {
35                in_fenced_code = !in_fenced_code;
36            } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
37                in_fenced_code = false;
38            } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
39                in_alternate_fenced = !in_alternate_fenced;
40            } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
41                in_alternate_fenced = false;
42            }
43        }
44
45        // Check if the current line is indented as code block
46        if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
47            return true;
48        }
49
50        // Return true if we're in any type of code block
51        in_fenced_code || in_alternate_fenced
52    }
53
54    /// Check if a line is a code block delimiter (start or end)
55    pub fn is_code_block_delimiter(line: &str) -> bool {
56        FENCED_CODE_BLOCK_START.is_match(line)
57            || FENCED_CODE_BLOCK_END.is_match(line)
58            || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
59            || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
60    }
61
62    /// Check if a line is the start of a code block
63    pub fn is_code_block_start(line: &str) -> bool {
64        FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
65    }
66
67    /// Check if a line is the end of a code block
68    pub fn is_code_block_end(line: &str) -> bool {
69        FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
70    }
71
72    /// Check if a line is an indented code block (4+ columns of leading whitespace)
73    pub fn is_indented_code_block(line: &str) -> bool {
74        // Use proper tab expansion to calculate effective indentation
75        ElementCache::calculate_indentation_width_default(line) >= 4
76    }
77
78    /// Extracts the language specifier from a fenced code block start line
79    ///
80    /// This function parses the line that starts a fenced code block (using either ``` or ~~~)
81    /// and extracts the language specifier that follows the fence markers.
82    ///
83    /// # Parameters
84    /// * `line` - The line of text that potentially contains a code block start with language specifier
85    ///
86    /// # Returns
87    /// * `Some(String)` - The language specifier if found
88    /// * `None` - If the line is not a code block start or has no language specifier
89    ///
90    /// # Examples
91    /// ```
92    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
93    ///
94    /// let specifier = CodeBlockUtils::get_language_specifier("```rust");
95    /// assert_eq!(specifier, Some("rust".to_string()));
96    ///
97    /// let specifier = CodeBlockUtils::get_language_specifier("~~~python");
98    /// assert_eq!(specifier, Some("python".to_string()));
99    ///
100    /// let specifier = CodeBlockUtils::get_language_specifier("```");
101    /// assert_eq!(specifier, None);
102    /// ```
103    pub fn get_language_specifier(line: &str) -> Option<String> {
104        if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
105            let trimmed = line.trim_start();
106            let after_fence = &trimmed[3..].trim_start();
107            if !after_fence.is_empty() {
108                return Some(after_fence.to_string());
109            }
110        }
111        None
112    }
113
114    /// Identify which lines in the content are in code blocks
115    ///
116    /// This function analyzes Markdown content and determines which lines are part of code blocks,
117    /// including both fenced code blocks (``` or ~~~) and indented code blocks.
118    ///
119    /// # Algorithm
120    /// - Iterates through each line of content
121    /// - Tracks state for fenced code blocks (toggled by fence delimiters)
122    /// - Detects indented code blocks (4 spaces or 1 tab)
123    /// - Handles nested code blocks appropriately
124    ///
125    /// # Parameters
126    /// * `content` - The full Markdown content to analyze
127    ///
128    /// # Returns
129    /// A vector of boolean values with the same length as the number of lines in the input content.
130    /// Each element indicates whether the corresponding line is inside a code block:
131    /// * `true` - The line is inside a code block
132    /// * `false` - The line is not inside a code block
133    ///
134    /// # Examples
135    /// ```
136    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
137    ///
138    /// let content = "Some text\n```rust\nlet x = 1;\n```\nMore text";
139    /// let in_code_block = CodeBlockUtils::identify_code_block_lines(content);
140    /// assert_eq!(in_code_block, vec![false, true, true, true, false]);
141    /// ```
142    pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
143        let lines: Vec<&str> = content.lines().collect();
144        let mut in_code_block = vec![false; lines.len()];
145
146        let mut in_fenced_code = false;
147        let mut in_alternate_fenced = false;
148
149        for (i, line) in lines.iter().enumerate() {
150            // Quick check for code fence markers with literal prefixes
151            let trimmed = line.trim_start();
152
153            if trimmed.starts_with("```") {
154                if FENCED_CODE_BLOCK_START.is_match(line) {
155                    in_fenced_code = !in_fenced_code;
156                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
157                } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
158                    in_fenced_code = false;
159                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
160                }
161            } else if trimmed.starts_with("~~~") {
162                if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
163                    in_alternate_fenced = !in_alternate_fenced;
164                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
165                } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
166                    in_alternate_fenced = false;
167                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
168                }
169            }
170
171            // If we're in a code fence, mark the line
172            if in_fenced_code || in_alternate_fenced {
173                in_code_block[i] = true;
174            } else if !in_code_block[i] {
175                // Check for indented code blocks only if not already marked
176                // Do not mark as code block if the line is a list item
177                if ElementCache::calculate_indentation_width_default(line) >= 4 && !LIST_ITEM_RE.is_match(line) {
178                    in_code_block[i] = true;
179                }
180            }
181        }
182
183        in_code_block
184    }
185}
186
187// Cached regex patterns for better performance
188static FENCED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(?:```|~~~)").unwrap());
189
190/// Tracks which lines are inside code blocks and their types
191#[derive(Debug, PartialEq, Clone, Copy)]
192pub enum CodeBlockState {
193    None,
194    Fenced,
195    Indented,
196}
197
198/// Structure to hold pre-computed code block information
199#[derive(Debug)]
200pub struct CodeBlockInfo<'a> {
201    /// Whether each line is in a code block, and which type
202    pub block_states: Vec<CodeBlockState>,
203    /// Positions of code spans in the text (start, end)
204    pub code_spans: Vec<(usize, usize)>,
205    /// The original content used to create this info
206    content: &'a str,
207    /// LineIndex for correct byte position calculations across all line ending types
208    line_index: LineIndex<'a>,
209}
210
211impl<'a> CodeBlockInfo<'a> {
212    /// Create a new CodeBlockInfo by analyzing the content
213    pub fn new(content: &'a str) -> Self {
214        let block_states = compute_code_blocks(content);
215        let code_spans = compute_code_spans(content);
216        let line_index = LineIndex::new(content);
217
218        CodeBlockInfo {
219            block_states,
220            code_spans,
221            content,
222            line_index,
223        }
224    }
225
226    /// Check if a line is inside a code block
227    pub fn is_in_code_block(&self, line_index: usize) -> bool {
228        if line_index < self.block_states.len() {
229            self.block_states[line_index] != CodeBlockState::None
230        } else {
231            false
232        }
233    }
234
235    /// Check if a position is inside a code span
236    pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
237        // Calculate absolute position using LineIndex for correct handling of all line ending types
238        let line_start = self
239            .line_index
240            .get_line_start_byte(line_index + 1)
241            .unwrap_or(self.content.len());
242        let position = line_start + column_index;
243
244        // Check if position is in any code span
245        for &(start, end) in &self.code_spans {
246            if position >= start && position <= end {
247                return true;
248            }
249        }
250
251        false
252    }
253
254    /// Quick check if content contains any code blocks
255    pub fn has_code_blocks(&self) -> bool {
256        self.block_states.iter().any(|state| *state != CodeBlockState::None)
257    }
258
259    /// Quick check if content contains any code spans
260    pub fn has_code_spans(&self) -> bool {
261        !self.code_spans.is_empty()
262    }
263}
264
265/// Compute which lines are in code blocks and what type
266pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
267    let mut in_fenced_block = false;
268    let mut result = Vec::new();
269    let mut fence_marker = "";
270
271    for line in content.lines() {
272        if in_fenced_block {
273            if line.trim().starts_with(fence_marker) {
274                in_fenced_block = false;
275                result.push(CodeBlockState::Fenced); // The closing fence is still part of the block
276            } else {
277                result.push(CodeBlockState::Fenced);
278            }
279        } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
280            in_fenced_block = true;
281            fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
282            result.push(CodeBlockState::Fenced); // The opening fence is part of the block
283        } else if !line.trim().is_empty() {
284            // Use proper tab expansion to check for indented code block
285            if ElementCache::calculate_indentation_width_default(line) >= 4 {
286                result.push(CodeBlockState::Indented);
287            } else {
288                result.push(CodeBlockState::None);
289            }
290        } else {
291            result.push(CodeBlockState::None);
292        }
293    }
294
295    result
296}
297
298/// Compute positions of code spans in the text
299pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
300    let mut spans = Vec::new();
301
302    // Simplify by using a safer character-based approach
303    let chars: Vec<char> = content.chars().collect();
304    let mut i = 0;
305
306    while i < chars.len() {
307        // Skip escaped backticks
308        if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
309            i += 1;
310            continue;
311        }
312
313        // Look for backtick sequences
314        if chars[i] == '`' {
315            let mut backtick_count = 1;
316            let start_idx = i;
317
318            // Count consecutive backticks
319            i += 1;
320            while i < chars.len() && chars[i] == '`' {
321                backtick_count += 1;
322                i += 1;
323            }
324
325            // Skip this if it looks like a code block delimiter
326            // This prevents confusion between code spans and code blocks
327            if is_likely_code_block_delimiter(&chars, start_idx) {
328                continue;
329            }
330
331            // Skip over content until we find a matching sequence of backticks
332            let mut j = i;
333            let mut found_closing = false;
334
335            while j < chars.len() {
336                // Skip escaped backticks in the search too
337                if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
338                    j += 1;
339                    continue;
340                }
341
342                if chars[j] == '`' {
343                    let mut closing_count = 1;
344                    let potential_end = j;
345
346                    // Count consecutive backticks
347                    j += 1;
348                    while j < chars.len() && chars[j] == '`' {
349                        closing_count += 1;
350                        j += 1;
351                    }
352
353                    // If we found a matching sequence, record the span
354                    if closing_count == backtick_count {
355                        // Convert from character indices to byte indices
356                        let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
357                        let end_byte = chars[..potential_end + closing_count]
358                            .iter()
359                            .map(|c| c.len_utf8())
360                            .sum();
361
362                        spans.push((start_byte, end_byte));
363                        i = j; // Resume search after this span
364                        found_closing = true;
365                        break;
366                    }
367                }
368
369                j += 1;
370            }
371
372            if !found_closing {
373                // If we didn't find a matching sequence, continue from where we left off
374                continue;
375            }
376        } else {
377            i += 1;
378        }
379    }
380
381    spans
382}
383
384// Helper function to determine if a backtick sequence is likely a code block delimiter
385fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
386    let mut count = 0;
387    let mut i = start_idx;
388
389    // Count the backticks
390    while i < chars.len() && chars[i] == '`' {
391        count += 1;
392        i += 1;
393    }
394
395    if count < 3 {
396        // Not enough backticks for a code block
397        return false;
398    }
399
400    // Check if this is at the start of a line or after only whitespace
401    let mut j = start_idx;
402    if j > 0 {
403        j -= 1;
404        // Go back to the beginning of the line
405        while j > 0 && chars[j] != '\n' {
406            if !chars[j].is_whitespace() {
407                // Non-whitespace character before the backticks on the same line
408                return false;
409            }
410            j -= 1;
411        }
412    }
413
414    true
415}
416
417/// The style for code blocks (MD046)
418#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
419pub enum CodeBlockStyle {
420    /// Consistent with the first code block style found
421    #[default]
422    Consistent,
423    /// Indented code blocks (4 spaces)
424    Indented,
425    /// Fenced code blocks (``` or ~~~)
426    Fenced,
427}
428
429impl fmt::Display for CodeBlockStyle {
430    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
431        match self {
432            CodeBlockStyle::Fenced => write!(f, "fenced"),
433            CodeBlockStyle::Indented => write!(f, "indented"),
434            CodeBlockStyle::Consistent => write!(f, "consistent"),
435        }
436    }
437}
438
439#[cfg(test)]
440mod tests {
441    use super::*;
442
443    #[test]
444    fn test_is_in_code_block() {
445        let content = "Normal text
446```rust
447let x = 1;
448```
449More text";
450
451        assert!(!CodeBlockUtils::is_in_code_block(content, 0));
452        assert!(CodeBlockUtils::is_in_code_block(content, 1));
453        assert!(CodeBlockUtils::is_in_code_block(content, 2));
454        assert!(!CodeBlockUtils::is_in_code_block(content, 3)); // Closing ``` ends the block
455        assert!(!CodeBlockUtils::is_in_code_block(content, 4));
456
457        // Test with alternate fence
458        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
459        assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
460        assert!(CodeBlockUtils::is_in_code_block(content2, 1));
461        assert!(CodeBlockUtils::is_in_code_block(content2, 2));
462        assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); // Closing ~~~ ends the block
463        assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
464
465        // Test indented code block
466        let content3 = "Normal\n    indented code\nNormal";
467        assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
468        assert!(CodeBlockUtils::is_in_code_block(content3, 1));
469        assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
470
471        // Test out of bounds
472        assert!(!CodeBlockUtils::is_in_code_block("test", 10));
473    }
474
475    #[test]
476    fn test_is_code_block_delimiter() {
477        assert!(CodeBlockUtils::is_code_block_delimiter("```"));
478        assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
479        assert!(CodeBlockUtils::is_code_block_delimiter("  ```"));
480        assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
481        assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
482
483        assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
484        assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
485        assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
486        assert!(!CodeBlockUtils::is_code_block_delimiter(""));
487    }
488
489    #[test]
490    fn test_is_code_block_start() {
491        assert!(CodeBlockUtils::is_code_block_start("```"));
492        assert!(CodeBlockUtils::is_code_block_start("```rust"));
493        assert!(CodeBlockUtils::is_code_block_start("~~~"));
494        assert!(CodeBlockUtils::is_code_block_start("~~~python"));
495        assert!(CodeBlockUtils::is_code_block_start("  ```"));
496
497        assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
498        assert!(!CodeBlockUtils::is_code_block_start(""));
499    }
500
501    #[test]
502    fn test_is_code_block_end() {
503        assert!(CodeBlockUtils::is_code_block_end("```"));
504        assert!(CodeBlockUtils::is_code_block_end("~~~"));
505        assert!(CodeBlockUtils::is_code_block_end("  ```"));
506        assert!(CodeBlockUtils::is_code_block_end("```  "));
507
508        // Language specifiers make it a start, not end
509        assert!(!CodeBlockUtils::is_code_block_end("```rust"));
510        assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
511        assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
512    }
513
514    #[test]
515    fn test_is_indented_code_block() {
516        assert!(CodeBlockUtils::is_indented_code_block("    code"));
517        assert!(CodeBlockUtils::is_indented_code_block("        more indented"));
518
519        // Tab expansion per CommonMark: tabs expand to next tab stop (columns 4, 8, 12, ...)
520        assert!(CodeBlockUtils::is_indented_code_block("\tcode")); // tab → column 4
521        assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); // 2 tabs → column 8
522        assert!(CodeBlockUtils::is_indented_code_block("  \tcode")); // 2 spaces + tab → column 4
523        assert!(CodeBlockUtils::is_indented_code_block(" \tcode")); // 1 space + tab → column 4
524        assert!(CodeBlockUtils::is_indented_code_block("   \tcode")); // 3 spaces + tab → column 4
525
526        assert!(!CodeBlockUtils::is_indented_code_block("   code")); // Only 3 spaces
527        assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
528        assert!(!CodeBlockUtils::is_indented_code_block(""));
529    }
530
531    #[test]
532    fn test_get_language_specifier() {
533        assert_eq!(
534            CodeBlockUtils::get_language_specifier("```rust"),
535            Some("rust".to_string())
536        );
537        assert_eq!(
538            CodeBlockUtils::get_language_specifier("~~~python"),
539            Some("python".to_string())
540        );
541        assert_eq!(
542            CodeBlockUtils::get_language_specifier("```javascript"),
543            Some("javascript".to_string())
544        );
545        assert_eq!(
546            CodeBlockUtils::get_language_specifier("  ```rust"),
547            Some("rust".to_string())
548        );
549        assert_eq!(
550            CodeBlockUtils::get_language_specifier("```rust ignore"),
551            Some("rust ignore".to_string())
552        );
553
554        assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
555        assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
556        assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
557        assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
558    }
559
560    #[test]
561    fn test_identify_code_block_lines() {
562        let content = "Normal text
563```rust
564let x = 1;
565```
566More text";
567
568        let result = CodeBlockUtils::identify_code_block_lines(content);
569        assert_eq!(result, vec![false, true, true, true, false]);
570
571        // Test with alternate fence
572        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
573        let result2 = CodeBlockUtils::identify_code_block_lines(content2);
574        assert_eq!(result2, vec![false, true, true, true, false]);
575
576        // Test with indented code
577        let content3 = "Normal\n    code\n    more code\nNormal";
578        let result3 = CodeBlockUtils::identify_code_block_lines(content3);
579        assert_eq!(result3, vec![false, true, true, false]);
580
581        // Test with list items (should not be treated as code)
582        let content4 = "List:\n    * Item 1\n    * Item 2";
583        let result4 = CodeBlockUtils::identify_code_block_lines(content4);
584        assert_eq!(result4, vec![false, false, false]);
585    }
586
587    #[test]
588    fn test_code_block_state_enum() {
589        assert_eq!(CodeBlockState::None, CodeBlockState::None);
590        assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
591        assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
592        assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
593    }
594
595    #[test]
596    fn test_code_block_info() {
597        let content = "Normal\n```\ncode\n```\nText";
598        let info = CodeBlockInfo::new(content);
599
600        assert!(!info.is_in_code_block(0));
601        assert!(info.is_in_code_block(1));
602        assert!(info.is_in_code_block(2));
603        assert!(info.is_in_code_block(3));
604        assert!(!info.is_in_code_block(4));
605
606        assert!(info.has_code_blocks());
607
608        // Test out of bounds
609        assert!(!info.is_in_code_block(100));
610    }
611
612    #[test]
613    fn test_code_block_info_code_spans() {
614        let content = "Text with `inline code` here";
615        let info = CodeBlockInfo::new(content);
616
617        assert!(info.has_code_spans());
618        assert!(!info.has_code_blocks());
619
620        // Test position inside code span
621        assert!(info.is_in_code_span(0, 11)); // Start of `inline
622        assert!(info.is_in_code_span(0, 15)); // Inside inline code
623        assert!(!info.is_in_code_span(0, 5)); // Before code span
624        assert!(!info.is_in_code_span(0, 25)); // After code span
625    }
626
627    #[test]
628    fn test_compute_code_blocks() {
629        let content = "Normal\n```\ncode\n```\n    indented";
630        let states = compute_code_blocks(content);
631
632        assert_eq!(states[0], CodeBlockState::None);
633        assert_eq!(states[1], CodeBlockState::Fenced);
634        assert_eq!(states[2], CodeBlockState::Fenced);
635        assert_eq!(states[3], CodeBlockState::Fenced);
636        assert_eq!(states[4], CodeBlockState::Indented);
637    }
638
639    #[test]
640    fn test_compute_code_spans() {
641        let content = "Text `code` and ``double`` backticks";
642        let spans = compute_code_spans(content);
643
644        assert_eq!(spans.len(), 2);
645        // First span: `code`
646        assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
647        // Second span: ``double``
648        assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
649
650        // Test escaped backticks
651        let content2 = r"Text \`not code\` but `real code`";
652        let spans2 = compute_code_spans(content2);
653        assert_eq!(spans2.len(), 1);
654        assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
655    }
656
657    #[test]
658    fn test_code_block_style() {
659        assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
660        assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
661        assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
662
663        assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
664    }
665
666    #[test]
667    fn test_nested_code_blocks() {
668        // Nested code blocks don't exist in markdown, but test edge cases
669        let content = "```\n```\ncode\n```\n```";
670        let result = CodeBlockUtils::identify_code_block_lines(content);
671        // First ``` starts a block, second ``` ends it, third starts new block
672        assert_eq!(result, vec![true, true, false, true, true]);
673    }
674
675    #[test]
676    fn test_unicode_content() {
677        let content = "```rust\nlet 你好 = \"世界\";\n```";
678        let result = CodeBlockUtils::identify_code_block_lines(content);
679        assert_eq!(result, vec![true, true, true]);
680
681        assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
682    }
683
684    #[test]
685    fn test_edge_cases() {
686        // Empty content
687        assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
688        assert!(!CodeBlockUtils::is_in_code_block("", 0));
689
690        // Just delimiters
691        assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
692        assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
693
694        // Mixed fence types (should not close each other)
695        let content = "```\ncode\n~~~\nmore\n```";
696        let result = CodeBlockUtils::identify_code_block_lines(content);
697        assert_eq!(result, vec![true, true, true, true, true]);
698    }
699}