rumdl_lib/rules/
code_block_utils.rs

1use lazy_static::lazy_static;
2use regex::Regex;
3use std::fmt;
4
5lazy_static! {
6    // Standard code block detection patterns
7    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
8    static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
9    static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
10    static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
11    static ref INDENTED_CODE_BLOCK: Regex = Regex::new(r"^(\s{4,})").unwrap();
12    static ref LIST_ITEM_RE: Regex = Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap();
13}
14
15/// Utility functions for detecting and handling code blocks in Markdown documents
16pub struct CodeBlockUtils;
17
18impl CodeBlockUtils {
19    /// Check if a line is inside a code block
20    pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
21        let lines: Vec<&str> = content.lines().collect();
22        if line_num >= lines.len() {
23            return false;
24        }
25
26        let mut in_fenced_code = false;
27        let mut in_alternate_fenced = false;
28
29        for (i, line) in lines.iter().enumerate() {
30            if i > line_num {
31                break;
32            }
33
34            if FENCED_CODE_BLOCK_START.is_match(line) {
35                in_fenced_code = !in_fenced_code;
36            } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
37                in_fenced_code = false;
38            } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
39                in_alternate_fenced = !in_alternate_fenced;
40            } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
41                in_alternate_fenced = false;
42            }
43        }
44
45        // Check if the current line is indented as code block
46        if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
47            return true;
48        }
49
50        // Return true if we're in any type of code block
51        in_fenced_code || in_alternate_fenced
52    }
53
54    /// Check if a line is a code block delimiter (start or end)
55    pub fn is_code_block_delimiter(line: &str) -> bool {
56        FENCED_CODE_BLOCK_START.is_match(line)
57            || FENCED_CODE_BLOCK_END.is_match(line)
58            || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
59            || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
60    }
61
62    /// Check if a line is the start of a code block
63    pub fn is_code_block_start(line: &str) -> bool {
64        FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
65    }
66
67    /// Check if a line is the end of a code block
68    pub fn is_code_block_end(line: &str) -> bool {
69        FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
70    }
71
72    /// Check if a line is an indented code block
73    pub fn is_indented_code_block(line: &str) -> bool {
74        // Convert tabs to spaces (1 tab = 4 spaces) for proper indentation checking
75        let expanded_line = line.replace('\t', "    ");
76        INDENTED_CODE_BLOCK.is_match(&expanded_line)
77    }
78
79    /// Extracts the language specifier from a fenced code block start line
80    ///
81    /// This function parses the line that starts a fenced code block (using either ``` or ~~~)
82    /// and extracts the language specifier that follows the fence markers.
83    ///
84    /// # Parameters
85    /// * `line` - The line of text that potentially contains a code block start with language specifier
86    ///
87    /// # Returns
88    /// * `Some(String)` - The language specifier if found
89    /// * `None` - If the line is not a code block start or has no language specifier
90    ///
91    /// # Examples
92    /// ```
93    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
94    ///
95    /// let specifier = CodeBlockUtils::get_language_specifier("```rust");
96    /// assert_eq!(specifier, Some("rust".to_string()));
97    ///
98    /// let specifier = CodeBlockUtils::get_language_specifier("~~~python");
99    /// assert_eq!(specifier, Some("python".to_string()));
100    ///
101    /// let specifier = CodeBlockUtils::get_language_specifier("```");
102    /// assert_eq!(specifier, None);
103    /// ```
104    pub fn get_language_specifier(line: &str) -> Option<String> {
105        if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
106            let trimmed = line.trim_start();
107            let after_fence = &trimmed[3..].trim_start();
108            if !after_fence.is_empty() {
109                return Some(after_fence.to_string());
110            }
111        }
112        None
113    }
114
115    /// Identify which lines in the content are in code blocks
116    ///
117    /// This function analyzes Markdown content and determines which lines are part of code blocks,
118    /// including both fenced code blocks (``` or ~~~) and indented code blocks.
119    ///
120    /// # Algorithm
121    /// - Iterates through each line of content
122    /// - Tracks state for fenced code blocks (toggled by fence delimiters)
123    /// - Detects indented code blocks (4 spaces or 1 tab)
124    /// - Handles nested code blocks appropriately
125    ///
126    /// # Parameters
127    /// * `content` - The full Markdown content to analyze
128    ///
129    /// # Returns
130    /// A vector of boolean values with the same length as the number of lines in the input content.
131    /// Each element indicates whether the corresponding line is inside a code block:
132    /// * `true` - The line is inside a code block
133    /// * `false` - The line is not inside a code block
134    ///
135    /// # Examples
136    /// ```
137    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
138    ///
139    /// let content = "Some text\n```rust\nlet x = 1;\n```\nMore text";
140    /// let in_code_block = CodeBlockUtils::identify_code_block_lines(content);
141    /// assert_eq!(in_code_block, vec![false, true, true, true, false]);
142    /// ```
143    pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
144        let lines: Vec<&str> = content.lines().collect();
145        let mut in_code_block = vec![false; lines.len()];
146
147        let mut in_fenced_code = false;
148        let mut in_alternate_fenced = false;
149
150        for (i, line) in lines.iter().enumerate() {
151            // Quick check for code fence markers with literal prefixes
152            let trimmed = line.trim_start();
153
154            if trimmed.starts_with("```") {
155                if FENCED_CODE_BLOCK_START.is_match(line) {
156                    in_fenced_code = !in_fenced_code;
157                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
158                } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
159                    in_fenced_code = false;
160                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
161                }
162            } else if trimmed.starts_with("~~~") {
163                if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
164                    in_alternate_fenced = !in_alternate_fenced;
165                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
166                } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
167                    in_alternate_fenced = false;
168                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
169                }
170            }
171
172            // If we're in a code fence, mark the line
173            if in_fenced_code || in_alternate_fenced {
174                in_code_block[i] = true;
175            } else if !in_code_block[i] {
176                // Check for indented code blocks only if not already marked
177                // Do not mark as code block if the line is a list item
178                if (line.starts_with("    ") || INDENTED_CODE_BLOCK.is_match(line)) && !LIST_ITEM_RE.is_match(line) {
179                    in_code_block[i] = true;
180                }
181            }
182        }
183
184        in_code_block
185    }
186}
187
188// Cached regex patterns for better performance
189lazy_static! {
190    static ref FENCED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(?:```|~~~)").unwrap();
191    static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
192    static ref BACKTICK_PATTERN: Regex = Regex::new(r"(`+)").unwrap();
193}
194
195/// Tracks which lines are inside code blocks and their types
196#[derive(Debug, PartialEq, Clone, Copy)]
197pub enum CodeBlockState {
198    None,
199    Fenced,
200    Indented,
201}
202
203/// Structure to hold pre-computed code block information
204#[derive(Debug)]
205pub struct CodeBlockInfo {
206    /// Whether each line is in a code block, and which type
207    pub block_states: Vec<CodeBlockState>,
208    /// Positions of code spans in the text (start, end)
209    pub code_spans: Vec<(usize, usize)>,
210    /// The original content used to create this info
211    content: String,
212}
213
214impl CodeBlockInfo {
215    /// Create a new CodeBlockInfo by analyzing the content
216    pub fn new(content: &str) -> Self {
217        let block_states = compute_code_blocks(content);
218        let code_spans = compute_code_spans(content);
219
220        CodeBlockInfo {
221            block_states,
222            code_spans,
223            content: content.to_string(),
224        }
225    }
226
227    /// Check if a line is inside a code block
228    pub fn is_in_code_block(&self, line_index: usize) -> bool {
229        if line_index < self.block_states.len() {
230            self.block_states[line_index] != CodeBlockState::None
231        } else {
232            false
233        }
234    }
235
236    /// Check if a position is inside a code span
237    pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
238        // Calculate absolute position (this assumes content is ASCII-only)
239        let mut position = 0;
240        let content_lines: Vec<&str> = self.content.lines().collect();
241
242        for i in 0..line_index {
243            if i < content_lines.len() {
244                position += content_lines[i].len() + 1; // +1 for newline
245            }
246        }
247
248        if line_index < content_lines.len() {
249            // Add column position
250            let line = content_lines[line_index];
251            if column_index < line.len() {
252                position += column_index;
253
254                // Check if position is in any code span
255                for &(start, end) in &self.code_spans {
256                    if position >= start && position <= end {
257                        return true;
258                    }
259                }
260            }
261        }
262
263        false
264    }
265
266    /// Quick check if content contains any code blocks
267    pub fn has_code_blocks(&self) -> bool {
268        self.block_states.iter().any(|state| *state != CodeBlockState::None)
269    }
270
271    /// Quick check if content contains any code spans
272    pub fn has_code_spans(&self) -> bool {
273        !self.code_spans.is_empty()
274    }
275}
276
277/// Compute which lines are in code blocks and what type
278pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
279    let mut in_fenced_block = false;
280    let mut result = Vec::new();
281    let mut fence_marker = "";
282
283    for line in content.lines() {
284        if in_fenced_block {
285            if line.trim().starts_with(fence_marker) {
286                in_fenced_block = false;
287                result.push(CodeBlockState::Fenced); // The closing fence is still part of the block
288            } else {
289                result.push(CodeBlockState::Fenced);
290            }
291        } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
292            in_fenced_block = true;
293            fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
294            result.push(CodeBlockState::Fenced); // The opening fence is part of the block
295        } else if !line.trim().is_empty() {
296            // Convert tabs to spaces for proper indentation checking
297            let expanded_line = line.replace('\t', "    ");
298            if INDENTED_CODE_BLOCK_PATTERN.is_match(&expanded_line) {
299                result.push(CodeBlockState::Indented);
300            } else {
301                result.push(CodeBlockState::None);
302            }
303        } else {
304            result.push(CodeBlockState::None);
305        }
306    }
307
308    result
309}
310
311/// Compute positions of code spans in the text
312pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
313    let mut spans = Vec::new();
314
315    // Simplify by using a safer character-based approach
316    let chars: Vec<char> = content.chars().collect();
317    let mut i = 0;
318
319    while i < chars.len() {
320        // Skip escaped backticks
321        if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
322            i += 1;
323            continue;
324        }
325
326        // Look for backtick sequences
327        if chars[i] == '`' {
328            let mut backtick_count = 1;
329            let start_idx = i;
330
331            // Count consecutive backticks
332            i += 1;
333            while i < chars.len() && chars[i] == '`' {
334                backtick_count += 1;
335                i += 1;
336            }
337
338            // Skip this if it looks like a code block delimiter
339            // This prevents confusion between code spans and code blocks
340            if is_likely_code_block_delimiter(&chars, start_idx) {
341                continue;
342            }
343
344            // Skip over content until we find a matching sequence of backticks
345            let mut j = i;
346            let mut found_closing = false;
347
348            while j < chars.len() {
349                // Skip escaped backticks in the search too
350                if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
351                    j += 1;
352                    continue;
353                }
354
355                if chars[j] == '`' {
356                    let mut closing_count = 1;
357                    let potential_end = j;
358
359                    // Count consecutive backticks
360                    j += 1;
361                    while j < chars.len() && chars[j] == '`' {
362                        closing_count += 1;
363                        j += 1;
364                    }
365
366                    // If we found a matching sequence, record the span
367                    if closing_count == backtick_count {
368                        // Convert from character indices to byte indices
369                        let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
370                        let end_byte = chars[..potential_end + closing_count]
371                            .iter()
372                            .map(|c| c.len_utf8())
373                            .sum();
374
375                        spans.push((start_byte, end_byte));
376                        i = j; // Resume search after this span
377                        found_closing = true;
378                        break;
379                    }
380                }
381
382                j += 1;
383            }
384
385            if !found_closing {
386                // If we didn't find a matching sequence, continue from where we left off
387                continue;
388            }
389        } else {
390            i += 1;
391        }
392    }
393
394    spans
395}
396
397// Helper function to determine if a backtick sequence is likely a code block delimiter
398fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
399    let mut count = 0;
400    let mut i = start_idx;
401
402    // Count the backticks
403    while i < chars.len() && chars[i] == '`' {
404        count += 1;
405        i += 1;
406    }
407
408    if count < 3 {
409        // Not enough backticks for a code block
410        return false;
411    }
412
413    // Check if this is at the start of a line or after only whitespace
414    let mut j = start_idx;
415    if j > 0 {
416        j -= 1;
417        // Go back to the beginning of the line
418        while j > 0 && chars[j] != '\n' {
419            if !chars[j].is_whitespace() {
420                // Non-whitespace character before the backticks on the same line
421                return false;
422            }
423            j -= 1;
424        }
425    }
426
427    true
428}
429
430/// The style for code blocks (MD046)
431#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
432pub enum CodeBlockStyle {
433    /// Consistent with the first code block style found
434    #[default]
435    Consistent,
436    /// Indented code blocks (4 spaces)
437    Indented,
438    /// Fenced code blocks (``` or ~~~)
439    Fenced,
440}
441
442impl fmt::Display for CodeBlockStyle {
443    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
444        match self {
445            CodeBlockStyle::Fenced => write!(f, "fenced"),
446            CodeBlockStyle::Indented => write!(f, "indented"),
447            CodeBlockStyle::Consistent => write!(f, "consistent"),
448        }
449    }
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    #[test]
457    fn test_is_in_code_block() {
458        let content = "Normal text
459```rust
460let x = 1;
461```
462More text";
463
464        assert!(!CodeBlockUtils::is_in_code_block(content, 0));
465        assert!(CodeBlockUtils::is_in_code_block(content, 1));
466        assert!(CodeBlockUtils::is_in_code_block(content, 2));
467        assert!(!CodeBlockUtils::is_in_code_block(content, 3)); // Closing ``` ends the block
468        assert!(!CodeBlockUtils::is_in_code_block(content, 4));
469
470        // Test with alternate fence
471        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
472        assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
473        assert!(CodeBlockUtils::is_in_code_block(content2, 1));
474        assert!(CodeBlockUtils::is_in_code_block(content2, 2));
475        assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); // Closing ~~~ ends the block
476        assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
477
478        // Test indented code block
479        let content3 = "Normal\n    indented code\nNormal";
480        assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
481        assert!(CodeBlockUtils::is_in_code_block(content3, 1));
482        assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
483
484        // Test out of bounds
485        assert!(!CodeBlockUtils::is_in_code_block("test", 10));
486    }
487
488    #[test]
489    fn test_is_code_block_delimiter() {
490        assert!(CodeBlockUtils::is_code_block_delimiter("```"));
491        assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
492        assert!(CodeBlockUtils::is_code_block_delimiter("  ```"));
493        assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
494        assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
495
496        assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
497        assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
498        assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
499        assert!(!CodeBlockUtils::is_code_block_delimiter(""));
500    }
501
502    #[test]
503    fn test_is_code_block_start() {
504        assert!(CodeBlockUtils::is_code_block_start("```"));
505        assert!(CodeBlockUtils::is_code_block_start("```rust"));
506        assert!(CodeBlockUtils::is_code_block_start("~~~"));
507        assert!(CodeBlockUtils::is_code_block_start("~~~python"));
508        assert!(CodeBlockUtils::is_code_block_start("  ```"));
509
510        assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
511        assert!(!CodeBlockUtils::is_code_block_start(""));
512    }
513
514    #[test]
515    fn test_is_code_block_end() {
516        assert!(CodeBlockUtils::is_code_block_end("```"));
517        assert!(CodeBlockUtils::is_code_block_end("~~~"));
518        assert!(CodeBlockUtils::is_code_block_end("  ```"));
519        assert!(CodeBlockUtils::is_code_block_end("```  "));
520
521        // Language specifiers make it a start, not end
522        assert!(!CodeBlockUtils::is_code_block_end("```rust"));
523        assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
524        assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
525    }
526
527    #[test]
528    fn test_is_indented_code_block() {
529        assert!(CodeBlockUtils::is_indented_code_block("    code"));
530        assert!(CodeBlockUtils::is_indented_code_block("        more indented"));
531        // Tabs should be treated as 4 spaces each
532        assert!(CodeBlockUtils::is_indented_code_block("\tcode")); // 1 tab = 4 spaces
533        assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); // 2 tabs = 8 spaces
534        assert!(CodeBlockUtils::is_indented_code_block("  \tcode")); // 2 spaces + 1 tab = 6 spaces
535
536        assert!(!CodeBlockUtils::is_indented_code_block("   code")); // Only 3 spaces
537        assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
538        assert!(!CodeBlockUtils::is_indented_code_block(""));
539    }
540
541    #[test]
542    fn test_get_language_specifier() {
543        assert_eq!(
544            CodeBlockUtils::get_language_specifier("```rust"),
545            Some("rust".to_string())
546        );
547        assert_eq!(
548            CodeBlockUtils::get_language_specifier("~~~python"),
549            Some("python".to_string())
550        );
551        assert_eq!(
552            CodeBlockUtils::get_language_specifier("```javascript"),
553            Some("javascript".to_string())
554        );
555        assert_eq!(
556            CodeBlockUtils::get_language_specifier("  ```rust"),
557            Some("rust".to_string())
558        );
559        assert_eq!(
560            CodeBlockUtils::get_language_specifier("```rust ignore"),
561            Some("rust ignore".to_string())
562        );
563
564        assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
565        assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
566        assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
567        assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
568    }
569
570    #[test]
571    fn test_identify_code_block_lines() {
572        let content = "Normal text
573```rust
574let x = 1;
575```
576More text";
577
578        let result = CodeBlockUtils::identify_code_block_lines(content);
579        assert_eq!(result, vec![false, true, true, true, false]);
580
581        // Test with alternate fence
582        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
583        let result2 = CodeBlockUtils::identify_code_block_lines(content2);
584        assert_eq!(result2, vec![false, true, true, true, false]);
585
586        // Test with indented code
587        let content3 = "Normal\n    code\n    more code\nNormal";
588        let result3 = CodeBlockUtils::identify_code_block_lines(content3);
589        assert_eq!(result3, vec![false, true, true, false]);
590
591        // Test with list items (should not be treated as code)
592        let content4 = "List:\n    * Item 1\n    * Item 2";
593        let result4 = CodeBlockUtils::identify_code_block_lines(content4);
594        assert_eq!(result4, vec![false, false, false]);
595    }
596
597    #[test]
598    fn test_code_block_state_enum() {
599        assert_eq!(CodeBlockState::None, CodeBlockState::None);
600        assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
601        assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
602        assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
603    }
604
605    #[test]
606    fn test_code_block_info() {
607        let content = "Normal\n```\ncode\n```\nText";
608        let info = CodeBlockInfo::new(content);
609
610        assert!(!info.is_in_code_block(0));
611        assert!(info.is_in_code_block(1));
612        assert!(info.is_in_code_block(2));
613        assert!(info.is_in_code_block(3));
614        assert!(!info.is_in_code_block(4));
615
616        assert!(info.has_code_blocks());
617
618        // Test out of bounds
619        assert!(!info.is_in_code_block(100));
620    }
621
622    #[test]
623    fn test_code_block_info_code_spans() {
624        let content = "Text with `inline code` here";
625        let info = CodeBlockInfo::new(content);
626
627        assert!(info.has_code_spans());
628        assert!(!info.has_code_blocks());
629
630        // Test position inside code span
631        assert!(info.is_in_code_span(0, 11)); // Start of `inline
632        assert!(info.is_in_code_span(0, 15)); // Inside inline code
633        assert!(!info.is_in_code_span(0, 5)); // Before code span
634        assert!(!info.is_in_code_span(0, 25)); // After code span
635    }
636
637    #[test]
638    fn test_compute_code_blocks() {
639        let content = "Normal\n```\ncode\n```\n    indented";
640        let states = compute_code_blocks(content);
641
642        assert_eq!(states[0], CodeBlockState::None);
643        assert_eq!(states[1], CodeBlockState::Fenced);
644        assert_eq!(states[2], CodeBlockState::Fenced);
645        assert_eq!(states[3], CodeBlockState::Fenced);
646        assert_eq!(states[4], CodeBlockState::Indented);
647    }
648
649    #[test]
650    fn test_compute_code_spans() {
651        let content = "Text `code` and ``double`` backticks";
652        let spans = compute_code_spans(content);
653
654        assert_eq!(spans.len(), 2);
655        // First span: `code`
656        assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
657        // Second span: ``double``
658        assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
659
660        // Test escaped backticks
661        let content2 = r"Text \`not code\` but `real code`";
662        let spans2 = compute_code_spans(content2);
663        assert_eq!(spans2.len(), 1);
664        assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
665    }
666
667    #[test]
668    fn test_code_block_style() {
669        assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
670        assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
671        assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
672
673        assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
674    }
675
676    #[test]
677    fn test_nested_code_blocks() {
678        // Nested code blocks don't exist in markdown, but test edge cases
679        let content = "```\n```\ncode\n```\n```";
680        let result = CodeBlockUtils::identify_code_block_lines(content);
681        // First ``` starts a block, second ``` ends it, third starts new block
682        assert_eq!(result, vec![true, true, false, true, true]);
683    }
684
685    #[test]
686    fn test_unicode_content() {
687        let content = "```rust\nlet 你好 = \"世界\";\n```";
688        let result = CodeBlockUtils::identify_code_block_lines(content);
689        assert_eq!(result, vec![true, true, true]);
690
691        assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
692    }
693
694    #[test]
695    fn test_edge_cases() {
696        // Empty content
697        assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
698        assert!(!CodeBlockUtils::is_in_code_block("", 0));
699
700        // Just delimiters
701        assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
702        assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
703
704        // Mixed fence types (should not close each other)
705        let content = "```\ncode\n~~~\nmore\n```";
706        let result = CodeBlockUtils::identify_code_block_lines(content);
707        assert_eq!(result, vec![true, true, true, true, true]);
708    }
709}