rumdl_lib/rules/
code_block_utils.rs

1use crate::utils::range_utils::LineIndex;
2use regex::Regex;
3use std::fmt;
4use std::sync::LazyLock;
5
6// Standard code block detection patterns
7static FENCED_CODE_BLOCK_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap());
8static FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)```\s*$").unwrap());
9static ALTERNATE_FENCED_CODE_BLOCK_START: LazyLock<Regex> =
10    LazyLock::new(|| Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap());
11static ALTERNATE_FENCED_CODE_BLOCK_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)~~~\s*$").unwrap());
12static INDENTED_CODE_BLOCK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})").unwrap());
13static LIST_ITEM_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap());
14
15/// Utility functions for detecting and handling code blocks in Markdown documents
16pub struct CodeBlockUtils;
17
18impl CodeBlockUtils {
19    /// Check if a line is inside a code block
20    pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
21        let lines: Vec<&str> = content.lines().collect();
22        if line_num >= lines.len() {
23            return false;
24        }
25
26        let mut in_fenced_code = false;
27        let mut in_alternate_fenced = false;
28
29        for (i, line) in lines.iter().enumerate() {
30            if i > line_num {
31                break;
32            }
33
34            if FENCED_CODE_BLOCK_START.is_match(line) {
35                in_fenced_code = !in_fenced_code;
36            } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
37                in_fenced_code = false;
38            } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
39                in_alternate_fenced = !in_alternate_fenced;
40            } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
41                in_alternate_fenced = false;
42            }
43        }
44
45        // Check if the current line is indented as code block
46        if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
47            return true;
48        }
49
50        // Return true if we're in any type of code block
51        in_fenced_code || in_alternate_fenced
52    }
53
54    /// Check if a line is a code block delimiter (start or end)
55    pub fn is_code_block_delimiter(line: &str) -> bool {
56        FENCED_CODE_BLOCK_START.is_match(line)
57            || FENCED_CODE_BLOCK_END.is_match(line)
58            || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
59            || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
60    }
61
62    /// Check if a line is the start of a code block
63    pub fn is_code_block_start(line: &str) -> bool {
64        FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
65    }
66
67    /// Check if a line is the end of a code block
68    pub fn is_code_block_end(line: &str) -> bool {
69        FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
70    }
71
72    /// Check if a line is an indented code block
73    pub fn is_indented_code_block(line: &str) -> bool {
74        // Convert tabs to spaces (1 tab = 4 spaces) for proper indentation checking
75        let expanded_line = line.replace('\t', "    ");
76        INDENTED_CODE_BLOCK.is_match(&expanded_line)
77    }
78
79    /// Extracts the language specifier from a fenced code block start line
80    ///
81    /// This function parses the line that starts a fenced code block (using either ``` or ~~~)
82    /// and extracts the language specifier that follows the fence markers.
83    ///
84    /// # Parameters
85    /// * `line` - The line of text that potentially contains a code block start with language specifier
86    ///
87    /// # Returns
88    /// * `Some(String)` - The language specifier if found
89    /// * `None` - If the line is not a code block start or has no language specifier
90    ///
91    /// # Examples
92    /// ```
93    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
94    ///
95    /// let specifier = CodeBlockUtils::get_language_specifier("```rust");
96    /// assert_eq!(specifier, Some("rust".to_string()));
97    ///
98    /// let specifier = CodeBlockUtils::get_language_specifier("~~~python");
99    /// assert_eq!(specifier, Some("python".to_string()));
100    ///
101    /// let specifier = CodeBlockUtils::get_language_specifier("```");
102    /// assert_eq!(specifier, None);
103    /// ```
104    pub fn get_language_specifier(line: &str) -> Option<String> {
105        if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
106            let trimmed = line.trim_start();
107            let after_fence = &trimmed[3..].trim_start();
108            if !after_fence.is_empty() {
109                return Some(after_fence.to_string());
110            }
111        }
112        None
113    }
114
115    /// Identify which lines in the content are in code blocks
116    ///
117    /// This function analyzes Markdown content and determines which lines are part of code blocks,
118    /// including both fenced code blocks (``` or ~~~) and indented code blocks.
119    ///
120    /// # Algorithm
121    /// - Iterates through each line of content
122    /// - Tracks state for fenced code blocks (toggled by fence delimiters)
123    /// - Detects indented code blocks (4 spaces or 1 tab)
124    /// - Handles nested code blocks appropriately
125    ///
126    /// # Parameters
127    /// * `content` - The full Markdown content to analyze
128    ///
129    /// # Returns
130    /// A vector of boolean values with the same length as the number of lines in the input content.
131    /// Each element indicates whether the corresponding line is inside a code block:
132    /// * `true` - The line is inside a code block
133    /// * `false` - The line is not inside a code block
134    ///
135    /// # Examples
136    /// ```
137    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
138    ///
139    /// let content = "Some text\n```rust\nlet x = 1;\n```\nMore text";
140    /// let in_code_block = CodeBlockUtils::identify_code_block_lines(content);
141    /// assert_eq!(in_code_block, vec![false, true, true, true, false]);
142    /// ```
143    pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
144        let lines: Vec<&str> = content.lines().collect();
145        let mut in_code_block = vec![false; lines.len()];
146
147        let mut in_fenced_code = false;
148        let mut in_alternate_fenced = false;
149
150        for (i, line) in lines.iter().enumerate() {
151            // Quick check for code fence markers with literal prefixes
152            let trimmed = line.trim_start();
153
154            if trimmed.starts_with("```") {
155                if FENCED_CODE_BLOCK_START.is_match(line) {
156                    in_fenced_code = !in_fenced_code;
157                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
158                } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
159                    in_fenced_code = false;
160                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
161                }
162            } else if trimmed.starts_with("~~~") {
163                if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
164                    in_alternate_fenced = !in_alternate_fenced;
165                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
166                } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
167                    in_alternate_fenced = false;
168                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
169                }
170            }
171
172            // If we're in a code fence, mark the line
173            if in_fenced_code || in_alternate_fenced {
174                in_code_block[i] = true;
175            } else if !in_code_block[i] {
176                // Check for indented code blocks only if not already marked
177                // Do not mark as code block if the line is a list item
178                if (line.starts_with("    ") || INDENTED_CODE_BLOCK.is_match(line)) && !LIST_ITEM_RE.is_match(line) {
179                    in_code_block[i] = true;
180                }
181            }
182        }
183
184        in_code_block
185    }
186}
187
188// Cached regex patterns for better performance
189static FENCED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(?:```|~~~)").unwrap());
190static INDENTED_CODE_BLOCK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s{4,})").unwrap());
191
192/// Tracks which lines are inside code blocks and their types
193#[derive(Debug, PartialEq, Clone, Copy)]
194pub enum CodeBlockState {
195    None,
196    Fenced,
197    Indented,
198}
199
200/// Structure to hold pre-computed code block information
201#[derive(Debug)]
202pub struct CodeBlockInfo<'a> {
203    /// Whether each line is in a code block, and which type
204    pub block_states: Vec<CodeBlockState>,
205    /// Positions of code spans in the text (start, end)
206    pub code_spans: Vec<(usize, usize)>,
207    /// The original content used to create this info
208    content: &'a str,
209    /// LineIndex for correct byte position calculations across all line ending types
210    line_index: LineIndex<'a>,
211}
212
213impl<'a> CodeBlockInfo<'a> {
214    /// Create a new CodeBlockInfo by analyzing the content
215    pub fn new(content: &'a str) -> Self {
216        let block_states = compute_code_blocks(content);
217        let code_spans = compute_code_spans(content);
218        let line_index = LineIndex::new(content);
219
220        CodeBlockInfo {
221            block_states,
222            code_spans,
223            content,
224            line_index,
225        }
226    }
227
228    /// Check if a line is inside a code block
229    pub fn is_in_code_block(&self, line_index: usize) -> bool {
230        if line_index < self.block_states.len() {
231            self.block_states[line_index] != CodeBlockState::None
232        } else {
233            false
234        }
235    }
236
237    /// Check if a position is inside a code span
238    pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
239        // Calculate absolute position using LineIndex for correct handling of all line ending types
240        let line_start = self
241            .line_index
242            .get_line_start_byte(line_index + 1)
243            .unwrap_or(self.content.len());
244        let position = line_start + column_index;
245
246        // Check if position is in any code span
247        for &(start, end) in &self.code_spans {
248            if position >= start && position <= end {
249                return true;
250            }
251        }
252
253        false
254    }
255
256    /// Quick check if content contains any code blocks
257    pub fn has_code_blocks(&self) -> bool {
258        self.block_states.iter().any(|state| *state != CodeBlockState::None)
259    }
260
261    /// Quick check if content contains any code spans
262    pub fn has_code_spans(&self) -> bool {
263        !self.code_spans.is_empty()
264    }
265}
266
267/// Compute which lines are in code blocks and what type
268pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
269    let mut in_fenced_block = false;
270    let mut result = Vec::new();
271    let mut fence_marker = "";
272
273    for line in content.lines() {
274        if in_fenced_block {
275            if line.trim().starts_with(fence_marker) {
276                in_fenced_block = false;
277                result.push(CodeBlockState::Fenced); // The closing fence is still part of the block
278            } else {
279                result.push(CodeBlockState::Fenced);
280            }
281        } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
282            in_fenced_block = true;
283            fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
284            result.push(CodeBlockState::Fenced); // The opening fence is part of the block
285        } else if !line.trim().is_empty() {
286            // Convert tabs to spaces for proper indentation checking
287            let expanded_line = line.replace('\t', "    ");
288            if INDENTED_CODE_BLOCK_PATTERN.is_match(&expanded_line) {
289                result.push(CodeBlockState::Indented);
290            } else {
291                result.push(CodeBlockState::None);
292            }
293        } else {
294            result.push(CodeBlockState::None);
295        }
296    }
297
298    result
299}
300
301/// Compute positions of code spans in the text
302pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
303    let mut spans = Vec::new();
304
305    // Simplify by using a safer character-based approach
306    let chars: Vec<char> = content.chars().collect();
307    let mut i = 0;
308
309    while i < chars.len() {
310        // Skip escaped backticks
311        if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
312            i += 1;
313            continue;
314        }
315
316        // Look for backtick sequences
317        if chars[i] == '`' {
318            let mut backtick_count = 1;
319            let start_idx = i;
320
321            // Count consecutive backticks
322            i += 1;
323            while i < chars.len() && chars[i] == '`' {
324                backtick_count += 1;
325                i += 1;
326            }
327
328            // Skip this if it looks like a code block delimiter
329            // This prevents confusion between code spans and code blocks
330            if is_likely_code_block_delimiter(&chars, start_idx) {
331                continue;
332            }
333
334            // Skip over content until we find a matching sequence of backticks
335            let mut j = i;
336            let mut found_closing = false;
337
338            while j < chars.len() {
339                // Skip escaped backticks in the search too
340                if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
341                    j += 1;
342                    continue;
343                }
344
345                if chars[j] == '`' {
346                    let mut closing_count = 1;
347                    let potential_end = j;
348
349                    // Count consecutive backticks
350                    j += 1;
351                    while j < chars.len() && chars[j] == '`' {
352                        closing_count += 1;
353                        j += 1;
354                    }
355
356                    // If we found a matching sequence, record the span
357                    if closing_count == backtick_count {
358                        // Convert from character indices to byte indices
359                        let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
360                        let end_byte = chars[..potential_end + closing_count]
361                            .iter()
362                            .map(|c| c.len_utf8())
363                            .sum();
364
365                        spans.push((start_byte, end_byte));
366                        i = j; // Resume search after this span
367                        found_closing = true;
368                        break;
369                    }
370                }
371
372                j += 1;
373            }
374
375            if !found_closing {
376                // If we didn't find a matching sequence, continue from where we left off
377                continue;
378            }
379        } else {
380            i += 1;
381        }
382    }
383
384    spans
385}
386
387// Helper function to determine if a backtick sequence is likely a code block delimiter
388fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
389    let mut count = 0;
390    let mut i = start_idx;
391
392    // Count the backticks
393    while i < chars.len() && chars[i] == '`' {
394        count += 1;
395        i += 1;
396    }
397
398    if count < 3 {
399        // Not enough backticks for a code block
400        return false;
401    }
402
403    // Check if this is at the start of a line or after only whitespace
404    let mut j = start_idx;
405    if j > 0 {
406        j -= 1;
407        // Go back to the beginning of the line
408        while j > 0 && chars[j] != '\n' {
409            if !chars[j].is_whitespace() {
410                // Non-whitespace character before the backticks on the same line
411                return false;
412            }
413            j -= 1;
414        }
415    }
416
417    true
418}
419
420/// The style for code blocks (MD046)
421#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
422pub enum CodeBlockStyle {
423    /// Consistent with the first code block style found
424    #[default]
425    Consistent,
426    /// Indented code blocks (4 spaces)
427    Indented,
428    /// Fenced code blocks (``` or ~~~)
429    Fenced,
430}
431
432impl fmt::Display for CodeBlockStyle {
433    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
434        match self {
435            CodeBlockStyle::Fenced => write!(f, "fenced"),
436            CodeBlockStyle::Indented => write!(f, "indented"),
437            CodeBlockStyle::Consistent => write!(f, "consistent"),
438        }
439    }
440}
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445
446    #[test]
447    fn test_is_in_code_block() {
448        let content = "Normal text
449```rust
450let x = 1;
451```
452More text";
453
454        assert!(!CodeBlockUtils::is_in_code_block(content, 0));
455        assert!(CodeBlockUtils::is_in_code_block(content, 1));
456        assert!(CodeBlockUtils::is_in_code_block(content, 2));
457        assert!(!CodeBlockUtils::is_in_code_block(content, 3)); // Closing ``` ends the block
458        assert!(!CodeBlockUtils::is_in_code_block(content, 4));
459
460        // Test with alternate fence
461        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
462        assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
463        assert!(CodeBlockUtils::is_in_code_block(content2, 1));
464        assert!(CodeBlockUtils::is_in_code_block(content2, 2));
465        assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); // Closing ~~~ ends the block
466        assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
467
468        // Test indented code block
469        let content3 = "Normal\n    indented code\nNormal";
470        assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
471        assert!(CodeBlockUtils::is_in_code_block(content3, 1));
472        assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
473
474        // Test out of bounds
475        assert!(!CodeBlockUtils::is_in_code_block("test", 10));
476    }
477
478    #[test]
479    fn test_is_code_block_delimiter() {
480        assert!(CodeBlockUtils::is_code_block_delimiter("```"));
481        assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
482        assert!(CodeBlockUtils::is_code_block_delimiter("  ```"));
483        assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
484        assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
485
486        assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
487        assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
488        assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
489        assert!(!CodeBlockUtils::is_code_block_delimiter(""));
490    }
491
492    #[test]
493    fn test_is_code_block_start() {
494        assert!(CodeBlockUtils::is_code_block_start("```"));
495        assert!(CodeBlockUtils::is_code_block_start("```rust"));
496        assert!(CodeBlockUtils::is_code_block_start("~~~"));
497        assert!(CodeBlockUtils::is_code_block_start("~~~python"));
498        assert!(CodeBlockUtils::is_code_block_start("  ```"));
499
500        assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
501        assert!(!CodeBlockUtils::is_code_block_start(""));
502    }
503
504    #[test]
505    fn test_is_code_block_end() {
506        assert!(CodeBlockUtils::is_code_block_end("```"));
507        assert!(CodeBlockUtils::is_code_block_end("~~~"));
508        assert!(CodeBlockUtils::is_code_block_end("  ```"));
509        assert!(CodeBlockUtils::is_code_block_end("```  "));
510
511        // Language specifiers make it a start, not end
512        assert!(!CodeBlockUtils::is_code_block_end("```rust"));
513        assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
514        assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
515    }
516
517    #[test]
518    fn test_is_indented_code_block() {
519        assert!(CodeBlockUtils::is_indented_code_block("    code"));
520        assert!(CodeBlockUtils::is_indented_code_block("        more indented"));
521        // Tabs should be treated as 4 spaces each
522        assert!(CodeBlockUtils::is_indented_code_block("\tcode")); // 1 tab = 4 spaces
523        assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); // 2 tabs = 8 spaces
524        assert!(CodeBlockUtils::is_indented_code_block("  \tcode")); // 2 spaces + 1 tab = 6 spaces
525
526        assert!(!CodeBlockUtils::is_indented_code_block("   code")); // Only 3 spaces
527        assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
528        assert!(!CodeBlockUtils::is_indented_code_block(""));
529    }
530
531    #[test]
532    fn test_get_language_specifier() {
533        assert_eq!(
534            CodeBlockUtils::get_language_specifier("```rust"),
535            Some("rust".to_string())
536        );
537        assert_eq!(
538            CodeBlockUtils::get_language_specifier("~~~python"),
539            Some("python".to_string())
540        );
541        assert_eq!(
542            CodeBlockUtils::get_language_specifier("```javascript"),
543            Some("javascript".to_string())
544        );
545        assert_eq!(
546            CodeBlockUtils::get_language_specifier("  ```rust"),
547            Some("rust".to_string())
548        );
549        assert_eq!(
550            CodeBlockUtils::get_language_specifier("```rust ignore"),
551            Some("rust ignore".to_string())
552        );
553
554        assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
555        assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
556        assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
557        assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
558    }
559
560    #[test]
561    fn test_identify_code_block_lines() {
562        let content = "Normal text
563```rust
564let x = 1;
565```
566More text";
567
568        let result = CodeBlockUtils::identify_code_block_lines(content);
569        assert_eq!(result, vec![false, true, true, true, false]);
570
571        // Test with alternate fence
572        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
573        let result2 = CodeBlockUtils::identify_code_block_lines(content2);
574        assert_eq!(result2, vec![false, true, true, true, false]);
575
576        // Test with indented code
577        let content3 = "Normal\n    code\n    more code\nNormal";
578        let result3 = CodeBlockUtils::identify_code_block_lines(content3);
579        assert_eq!(result3, vec![false, true, true, false]);
580
581        // Test with list items (should not be treated as code)
582        let content4 = "List:\n    * Item 1\n    * Item 2";
583        let result4 = CodeBlockUtils::identify_code_block_lines(content4);
584        assert_eq!(result4, vec![false, false, false]);
585    }
586
587    #[test]
588    fn test_code_block_state_enum() {
589        assert_eq!(CodeBlockState::None, CodeBlockState::None);
590        assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
591        assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
592        assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
593    }
594
595    #[test]
596    fn test_code_block_info() {
597        let content = "Normal\n```\ncode\n```\nText";
598        let info = CodeBlockInfo::new(content);
599
600        assert!(!info.is_in_code_block(0));
601        assert!(info.is_in_code_block(1));
602        assert!(info.is_in_code_block(2));
603        assert!(info.is_in_code_block(3));
604        assert!(!info.is_in_code_block(4));
605
606        assert!(info.has_code_blocks());
607
608        // Test out of bounds
609        assert!(!info.is_in_code_block(100));
610    }
611
612    #[test]
613    fn test_code_block_info_code_spans() {
614        let content = "Text with `inline code` here";
615        let info = CodeBlockInfo::new(content);
616
617        assert!(info.has_code_spans());
618        assert!(!info.has_code_blocks());
619
620        // Test position inside code span
621        assert!(info.is_in_code_span(0, 11)); // Start of `inline
622        assert!(info.is_in_code_span(0, 15)); // Inside inline code
623        assert!(!info.is_in_code_span(0, 5)); // Before code span
624        assert!(!info.is_in_code_span(0, 25)); // After code span
625    }
626
627    #[test]
628    fn test_compute_code_blocks() {
629        let content = "Normal\n```\ncode\n```\n    indented";
630        let states = compute_code_blocks(content);
631
632        assert_eq!(states[0], CodeBlockState::None);
633        assert_eq!(states[1], CodeBlockState::Fenced);
634        assert_eq!(states[2], CodeBlockState::Fenced);
635        assert_eq!(states[3], CodeBlockState::Fenced);
636        assert_eq!(states[4], CodeBlockState::Indented);
637    }
638
639    #[test]
640    fn test_compute_code_spans() {
641        let content = "Text `code` and ``double`` backticks";
642        let spans = compute_code_spans(content);
643
644        assert_eq!(spans.len(), 2);
645        // First span: `code`
646        assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
647        // Second span: ``double``
648        assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
649
650        // Test escaped backticks
651        let content2 = r"Text \`not code\` but `real code`";
652        let spans2 = compute_code_spans(content2);
653        assert_eq!(spans2.len(), 1);
654        assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
655    }
656
657    #[test]
658    fn test_code_block_style() {
659        assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
660        assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
661        assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
662
663        assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
664    }
665
666    #[test]
667    fn test_nested_code_blocks() {
668        // Nested code blocks don't exist in markdown, but test edge cases
669        let content = "```\n```\ncode\n```\n```";
670        let result = CodeBlockUtils::identify_code_block_lines(content);
671        // First ``` starts a block, second ``` ends it, third starts new block
672        assert_eq!(result, vec![true, true, false, true, true]);
673    }
674
675    #[test]
676    fn test_unicode_content() {
677        let content = "```rust\nlet 你好 = \"世界\";\n```";
678        let result = CodeBlockUtils::identify_code_block_lines(content);
679        assert_eq!(result, vec![true, true, true]);
680
681        assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
682    }
683
684    #[test]
685    fn test_edge_cases() {
686        // Empty content
687        assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
688        assert!(!CodeBlockUtils::is_in_code_block("", 0));
689
690        // Just delimiters
691        assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
692        assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
693
694        // Mixed fence types (should not close each other)
695        let content = "```\ncode\n~~~\nmore\n```";
696        let result = CodeBlockUtils::identify_code_block_lines(content);
697        assert_eq!(result, vec![true, true, true, true, true]);
698    }
699}