rumdl_lib/rules/
code_block_utils.rs

1use crate::utils::range_utils::LineIndex;
2use lazy_static::lazy_static;
3use regex::Regex;
4use std::fmt;
5
6lazy_static! {
7    // Standard code block detection patterns
8    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
9    static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
10    static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
11    static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
12    static ref INDENTED_CODE_BLOCK: Regex = Regex::new(r"^(\s{4,})").unwrap();
13    static ref LIST_ITEM_RE: Regex = Regex::new(r"^(\s*)([*+-]|\d+[.)])(\s*)(.*)$").unwrap();
14}
15
16/// Utility functions for detecting and handling code blocks in Markdown documents
17pub struct CodeBlockUtils;
18
19impl CodeBlockUtils {
20    /// Check if a line is inside a code block
21    pub fn is_in_code_block(content: &str, line_num: usize) -> bool {
22        let lines: Vec<&str> = content.lines().collect();
23        if line_num >= lines.len() {
24            return false;
25        }
26
27        let mut in_fenced_code = false;
28        let mut in_alternate_fenced = false;
29
30        for (i, line) in lines.iter().enumerate() {
31            if i > line_num {
32                break;
33            }
34
35            if FENCED_CODE_BLOCK_START.is_match(line) {
36                in_fenced_code = !in_fenced_code;
37            } else if FENCED_CODE_BLOCK_END.is_match(line) && in_fenced_code {
38                in_fenced_code = false;
39            } else if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
40                in_alternate_fenced = !in_alternate_fenced;
41            } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && in_alternate_fenced {
42                in_alternate_fenced = false;
43            }
44        }
45
46        // Check if the current line is indented as code block
47        if line_num < lines.len() && Self::is_indented_code_block(lines[line_num]) {
48            return true;
49        }
50
51        // Return true if we're in any type of code block
52        in_fenced_code || in_alternate_fenced
53    }
54
55    /// Check if a line is a code block delimiter (start or end)
56    pub fn is_code_block_delimiter(line: &str) -> bool {
57        FENCED_CODE_BLOCK_START.is_match(line)
58            || FENCED_CODE_BLOCK_END.is_match(line)
59            || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
60            || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
61    }
62
63    /// Check if a line is the start of a code block
64    pub fn is_code_block_start(line: &str) -> bool {
65        FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line)
66    }
67
68    /// Check if a line is the end of a code block
69    pub fn is_code_block_end(line: &str) -> bool {
70        FENCED_CODE_BLOCK_END.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line)
71    }
72
73    /// Check if a line is an indented code block
74    pub fn is_indented_code_block(line: &str) -> bool {
75        // Convert tabs to spaces (1 tab = 4 spaces) for proper indentation checking
76        let expanded_line = line.replace('\t', "    ");
77        INDENTED_CODE_BLOCK.is_match(&expanded_line)
78    }
79
80    /// Extracts the language specifier from a fenced code block start line
81    ///
82    /// This function parses the line that starts a fenced code block (using either ``` or ~~~)
83    /// and extracts the language specifier that follows the fence markers.
84    ///
85    /// # Parameters
86    /// * `line` - The line of text that potentially contains a code block start with language specifier
87    ///
88    /// # Returns
89    /// * `Some(String)` - The language specifier if found
90    /// * `None` - If the line is not a code block start or has no language specifier
91    ///
92    /// # Examples
93    /// ```
94    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
95    ///
96    /// let specifier = CodeBlockUtils::get_language_specifier("```rust");
97    /// assert_eq!(specifier, Some("rust".to_string()));
98    ///
99    /// let specifier = CodeBlockUtils::get_language_specifier("~~~python");
100    /// assert_eq!(specifier, Some("python".to_string()));
101    ///
102    /// let specifier = CodeBlockUtils::get_language_specifier("```");
103    /// assert_eq!(specifier, None);
104    /// ```
105    pub fn get_language_specifier(line: &str) -> Option<String> {
106        if FENCED_CODE_BLOCK_START.is_match(line) || ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
107            let trimmed = line.trim_start();
108            let after_fence = &trimmed[3..].trim_start();
109            if !after_fence.is_empty() {
110                return Some(after_fence.to_string());
111            }
112        }
113        None
114    }
115
116    /// Identify which lines in the content are in code blocks
117    ///
118    /// This function analyzes Markdown content and determines which lines are part of code blocks,
119    /// including both fenced code blocks (``` or ~~~) and indented code blocks.
120    ///
121    /// # Algorithm
122    /// - Iterates through each line of content
123    /// - Tracks state for fenced code blocks (toggled by fence delimiters)
124    /// - Detects indented code blocks (4 spaces or 1 tab)
125    /// - Handles nested code blocks appropriately
126    ///
127    /// # Parameters
128    /// * `content` - The full Markdown content to analyze
129    ///
130    /// # Returns
131    /// A vector of boolean values with the same length as the number of lines in the input content.
132    /// Each element indicates whether the corresponding line is inside a code block:
133    /// * `true` - The line is inside a code block
134    /// * `false` - The line is not inside a code block
135    ///
136    /// # Examples
137    /// ```
138    /// use rumdl_lib::rules::code_block_utils::CodeBlockUtils;
139    ///
140    /// let content = "Some text\n```rust\nlet x = 1;\n```\nMore text";
141    /// let in_code_block = CodeBlockUtils::identify_code_block_lines(content);
142    /// assert_eq!(in_code_block, vec![false, true, true, true, false]);
143    /// ```
144    pub fn identify_code_block_lines(content: &str) -> Vec<bool> {
145        let lines: Vec<&str> = content.lines().collect();
146        let mut in_code_block = vec![false; lines.len()];
147
148        let mut in_fenced_code = false;
149        let mut in_alternate_fenced = false;
150
151        for (i, line) in lines.iter().enumerate() {
152            // Quick check for code fence markers with literal prefixes
153            let trimmed = line.trim_start();
154
155            if trimmed.starts_with("```") {
156                if FENCED_CODE_BLOCK_START.is_match(line) {
157                    in_fenced_code = !in_fenced_code;
158                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
159                } else if in_fenced_code && FENCED_CODE_BLOCK_END.is_match(line) {
160                    in_fenced_code = false;
161                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
162                }
163            } else if trimmed.starts_with("~~~") {
164                if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
165                    in_alternate_fenced = !in_alternate_fenced;
166                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
167                } else if in_alternate_fenced && ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) {
168                    in_alternate_fenced = false;
169                    in_code_block[i] = true; // Mark the delimiter line as part of the code block
170                }
171            }
172
173            // If we're in a code fence, mark the line
174            if in_fenced_code || in_alternate_fenced {
175                in_code_block[i] = true;
176            } else if !in_code_block[i] {
177                // Check for indented code blocks only if not already marked
178                // Do not mark as code block if the line is a list item
179                if (line.starts_with("    ") || INDENTED_CODE_BLOCK.is_match(line)) && !LIST_ITEM_RE.is_match(line) {
180                    in_code_block[i] = true;
181                }
182            }
183        }
184
185        in_code_block
186    }
187}
188
189// Cached regex patterns for better performance
190lazy_static! {
191    static ref FENCED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(?:```|~~~)").unwrap();
192    static ref INDENTED_CODE_BLOCK_PATTERN: Regex = Regex::new(r"^(\s{4,})").unwrap();
193    static ref BACKTICK_PATTERN: Regex = Regex::new(r"(`+)").unwrap();
194}
195
196/// Tracks which lines are inside code blocks and their types
197#[derive(Debug, PartialEq, Clone, Copy)]
198pub enum CodeBlockState {
199    None,
200    Fenced,
201    Indented,
202}
203
204/// Structure to hold pre-computed code block information
205#[derive(Debug)]
206pub struct CodeBlockInfo {
207    /// Whether each line is in a code block, and which type
208    pub block_states: Vec<CodeBlockState>,
209    /// Positions of code spans in the text (start, end)
210    pub code_spans: Vec<(usize, usize)>,
211    /// The original content used to create this info
212    content: String,
213    /// LineIndex for correct byte position calculations across all line ending types
214    line_index: LineIndex,
215}
216
217impl CodeBlockInfo {
218    /// Create a new CodeBlockInfo by analyzing the content
219    pub fn new(content: &str) -> Self {
220        let block_states = compute_code_blocks(content);
221        let code_spans = compute_code_spans(content);
222        let line_index = LineIndex::new(content.to_string());
223
224        CodeBlockInfo {
225            block_states,
226            code_spans,
227            content: content.to_string(),
228            line_index,
229        }
230    }
231
232    /// Check if a line is inside a code block
233    pub fn is_in_code_block(&self, line_index: usize) -> bool {
234        if line_index < self.block_states.len() {
235            self.block_states[line_index] != CodeBlockState::None
236        } else {
237            false
238        }
239    }
240
241    /// Check if a position is inside a code span
242    pub fn is_in_code_span(&self, line_index: usize, column_index: usize) -> bool {
243        // Calculate absolute position using LineIndex for correct handling of all line ending types
244        let line_start = self
245            .line_index
246            .get_line_start_byte(line_index + 1)
247            .unwrap_or(self.content.len());
248        let position = line_start + column_index;
249
250        // Check if position is in any code span
251        for &(start, end) in &self.code_spans {
252            if position >= start && position <= end {
253                return true;
254            }
255        }
256
257        false
258    }
259
260    /// Quick check if content contains any code blocks
261    pub fn has_code_blocks(&self) -> bool {
262        self.block_states.iter().any(|state| *state != CodeBlockState::None)
263    }
264
265    /// Quick check if content contains any code spans
266    pub fn has_code_spans(&self) -> bool {
267        !self.code_spans.is_empty()
268    }
269}
270
271/// Compute which lines are in code blocks and what type
272pub fn compute_code_blocks(content: &str) -> Vec<CodeBlockState> {
273    let mut in_fenced_block = false;
274    let mut result = Vec::new();
275    let mut fence_marker = "";
276
277    for line in content.lines() {
278        if in_fenced_block {
279            if line.trim().starts_with(fence_marker) {
280                in_fenced_block = false;
281                result.push(CodeBlockState::Fenced); // The closing fence is still part of the block
282            } else {
283                result.push(CodeBlockState::Fenced);
284            }
285        } else if FENCED_CODE_BLOCK_PATTERN.is_match(line) {
286            in_fenced_block = true;
287            fence_marker = if line.trim().starts_with("```") { "```" } else { "~~~" };
288            result.push(CodeBlockState::Fenced); // The opening fence is part of the block
289        } else if !line.trim().is_empty() {
290            // Convert tabs to spaces for proper indentation checking
291            let expanded_line = line.replace('\t', "    ");
292            if INDENTED_CODE_BLOCK_PATTERN.is_match(&expanded_line) {
293                result.push(CodeBlockState::Indented);
294            } else {
295                result.push(CodeBlockState::None);
296            }
297        } else {
298            result.push(CodeBlockState::None);
299        }
300    }
301
302    result
303}
304
305/// Compute positions of code spans in the text
306pub fn compute_code_spans(content: &str) -> Vec<(usize, usize)> {
307    let mut spans = Vec::new();
308
309    // Simplify by using a safer character-based approach
310    let chars: Vec<char> = content.chars().collect();
311    let mut i = 0;
312
313    while i < chars.len() {
314        // Skip escaped backticks
315        if i > 0 && chars[i] == '`' && chars[i - 1] == '\\' {
316            i += 1;
317            continue;
318        }
319
320        // Look for backtick sequences
321        if chars[i] == '`' {
322            let mut backtick_count = 1;
323            let start_idx = i;
324
325            // Count consecutive backticks
326            i += 1;
327            while i < chars.len() && chars[i] == '`' {
328                backtick_count += 1;
329                i += 1;
330            }
331
332            // Skip this if it looks like a code block delimiter
333            // This prevents confusion between code spans and code blocks
334            if is_likely_code_block_delimiter(&chars, start_idx) {
335                continue;
336            }
337
338            // Skip over content until we find a matching sequence of backticks
339            let mut j = i;
340            let mut found_closing = false;
341
342            while j < chars.len() {
343                // Skip escaped backticks in the search too
344                if j > 0 && chars[j] == '`' && chars[j - 1] == '\\' {
345                    j += 1;
346                    continue;
347                }
348
349                if chars[j] == '`' {
350                    let mut closing_count = 1;
351                    let potential_end = j;
352
353                    // Count consecutive backticks
354                    j += 1;
355                    while j < chars.len() && chars[j] == '`' {
356                        closing_count += 1;
357                        j += 1;
358                    }
359
360                    // If we found a matching sequence, record the span
361                    if closing_count == backtick_count {
362                        // Convert from character indices to byte indices
363                        let start_byte = chars[..start_idx].iter().map(|c| c.len_utf8()).sum();
364                        let end_byte = chars[..potential_end + closing_count]
365                            .iter()
366                            .map(|c| c.len_utf8())
367                            .sum();
368
369                        spans.push((start_byte, end_byte));
370                        i = j; // Resume search after this span
371                        found_closing = true;
372                        break;
373                    }
374                }
375
376                j += 1;
377            }
378
379            if !found_closing {
380                // If we didn't find a matching sequence, continue from where we left off
381                continue;
382            }
383        } else {
384            i += 1;
385        }
386    }
387
388    spans
389}
390
391// Helper function to determine if a backtick sequence is likely a code block delimiter
392fn is_likely_code_block_delimiter(chars: &[char], start_idx: usize) -> bool {
393    let mut count = 0;
394    let mut i = start_idx;
395
396    // Count the backticks
397    while i < chars.len() && chars[i] == '`' {
398        count += 1;
399        i += 1;
400    }
401
402    if count < 3 {
403        // Not enough backticks for a code block
404        return false;
405    }
406
407    // Check if this is at the start of a line or after only whitespace
408    let mut j = start_idx;
409    if j > 0 {
410        j -= 1;
411        // Go back to the beginning of the line
412        while j > 0 && chars[j] != '\n' {
413            if !chars[j].is_whitespace() {
414                // Non-whitespace character before the backticks on the same line
415                return false;
416            }
417            j -= 1;
418        }
419    }
420
421    true
422}
423
424/// The style for code blocks (MD046)
425#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
426pub enum CodeBlockStyle {
427    /// Consistent with the first code block style found
428    #[default]
429    Consistent,
430    /// Indented code blocks (4 spaces)
431    Indented,
432    /// Fenced code blocks (``` or ~~~)
433    Fenced,
434}
435
436impl fmt::Display for CodeBlockStyle {
437    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
438        match self {
439            CodeBlockStyle::Fenced => write!(f, "fenced"),
440            CodeBlockStyle::Indented => write!(f, "indented"),
441            CodeBlockStyle::Consistent => write!(f, "consistent"),
442        }
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn test_is_in_code_block() {
452        let content = "Normal text
453```rust
454let x = 1;
455```
456More text";
457
458        assert!(!CodeBlockUtils::is_in_code_block(content, 0));
459        assert!(CodeBlockUtils::is_in_code_block(content, 1));
460        assert!(CodeBlockUtils::is_in_code_block(content, 2));
461        assert!(!CodeBlockUtils::is_in_code_block(content, 3)); // Closing ``` ends the block
462        assert!(!CodeBlockUtils::is_in_code_block(content, 4));
463
464        // Test with alternate fence
465        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
466        assert!(!CodeBlockUtils::is_in_code_block(content2, 0));
467        assert!(CodeBlockUtils::is_in_code_block(content2, 1));
468        assert!(CodeBlockUtils::is_in_code_block(content2, 2));
469        assert!(!CodeBlockUtils::is_in_code_block(content2, 3)); // Closing ~~~ ends the block
470        assert!(!CodeBlockUtils::is_in_code_block(content2, 4));
471
472        // Test indented code block
473        let content3 = "Normal\n    indented code\nNormal";
474        assert!(!CodeBlockUtils::is_in_code_block(content3, 0));
475        assert!(CodeBlockUtils::is_in_code_block(content3, 1));
476        assert!(!CodeBlockUtils::is_in_code_block(content3, 2));
477
478        // Test out of bounds
479        assert!(!CodeBlockUtils::is_in_code_block("test", 10));
480    }
481
482    #[test]
483    fn test_is_code_block_delimiter() {
484        assert!(CodeBlockUtils::is_code_block_delimiter("```"));
485        assert!(CodeBlockUtils::is_code_block_delimiter("```rust"));
486        assert!(CodeBlockUtils::is_code_block_delimiter("  ```"));
487        assert!(CodeBlockUtils::is_code_block_delimiter("~~~"));
488        assert!(CodeBlockUtils::is_code_block_delimiter("~~~python"));
489
490        assert!(!CodeBlockUtils::is_code_block_delimiter("Normal text"));
491        assert!(!CodeBlockUtils::is_code_block_delimiter("``"));
492        assert!(!CodeBlockUtils::is_code_block_delimiter("~"));
493        assert!(!CodeBlockUtils::is_code_block_delimiter(""));
494    }
495
496    #[test]
497    fn test_is_code_block_start() {
498        assert!(CodeBlockUtils::is_code_block_start("```"));
499        assert!(CodeBlockUtils::is_code_block_start("```rust"));
500        assert!(CodeBlockUtils::is_code_block_start("~~~"));
501        assert!(CodeBlockUtils::is_code_block_start("~~~python"));
502        assert!(CodeBlockUtils::is_code_block_start("  ```"));
503
504        assert!(!CodeBlockUtils::is_code_block_start("Normal text"));
505        assert!(!CodeBlockUtils::is_code_block_start(""));
506    }
507
508    #[test]
509    fn test_is_code_block_end() {
510        assert!(CodeBlockUtils::is_code_block_end("```"));
511        assert!(CodeBlockUtils::is_code_block_end("~~~"));
512        assert!(CodeBlockUtils::is_code_block_end("  ```"));
513        assert!(CodeBlockUtils::is_code_block_end("```  "));
514
515        // Language specifiers make it a start, not end
516        assert!(!CodeBlockUtils::is_code_block_end("```rust"));
517        assert!(!CodeBlockUtils::is_code_block_end("~~~python"));
518        assert!(!CodeBlockUtils::is_code_block_end("Normal text"));
519    }
520
521    #[test]
522    fn test_is_indented_code_block() {
523        assert!(CodeBlockUtils::is_indented_code_block("    code"));
524        assert!(CodeBlockUtils::is_indented_code_block("        more indented"));
525        // Tabs should be treated as 4 spaces each
526        assert!(CodeBlockUtils::is_indented_code_block("\tcode")); // 1 tab = 4 spaces
527        assert!(CodeBlockUtils::is_indented_code_block("\t\tcode")); // 2 tabs = 8 spaces
528        assert!(CodeBlockUtils::is_indented_code_block("  \tcode")); // 2 spaces + 1 tab = 6 spaces
529
530        assert!(!CodeBlockUtils::is_indented_code_block("   code")); // Only 3 spaces
531        assert!(!CodeBlockUtils::is_indented_code_block("normal text"));
532        assert!(!CodeBlockUtils::is_indented_code_block(""));
533    }
534
535    #[test]
536    fn test_get_language_specifier() {
537        assert_eq!(
538            CodeBlockUtils::get_language_specifier("```rust"),
539            Some("rust".to_string())
540        );
541        assert_eq!(
542            CodeBlockUtils::get_language_specifier("~~~python"),
543            Some("python".to_string())
544        );
545        assert_eq!(
546            CodeBlockUtils::get_language_specifier("```javascript"),
547            Some("javascript".to_string())
548        );
549        assert_eq!(
550            CodeBlockUtils::get_language_specifier("  ```rust"),
551            Some("rust".to_string())
552        );
553        assert_eq!(
554            CodeBlockUtils::get_language_specifier("```rust ignore"),
555            Some("rust ignore".to_string())
556        );
557
558        assert_eq!(CodeBlockUtils::get_language_specifier("```"), None);
559        assert_eq!(CodeBlockUtils::get_language_specifier("~~~"), None);
560        assert_eq!(CodeBlockUtils::get_language_specifier("Normal text"), None);
561        assert_eq!(CodeBlockUtils::get_language_specifier(""), None);
562    }
563
564    #[test]
565    fn test_identify_code_block_lines() {
566        let content = "Normal text
567```rust
568let x = 1;
569```
570More text";
571
572        let result = CodeBlockUtils::identify_code_block_lines(content);
573        assert_eq!(result, vec![false, true, true, true, false]);
574
575        // Test with alternate fence
576        let content2 = "Text\n~~~\ncode\n~~~\nEnd";
577        let result2 = CodeBlockUtils::identify_code_block_lines(content2);
578        assert_eq!(result2, vec![false, true, true, true, false]);
579
580        // Test with indented code
581        let content3 = "Normal\n    code\n    more code\nNormal";
582        let result3 = CodeBlockUtils::identify_code_block_lines(content3);
583        assert_eq!(result3, vec![false, true, true, false]);
584
585        // Test with list items (should not be treated as code)
586        let content4 = "List:\n    * Item 1\n    * Item 2";
587        let result4 = CodeBlockUtils::identify_code_block_lines(content4);
588        assert_eq!(result4, vec![false, false, false]);
589    }
590
591    #[test]
592    fn test_code_block_state_enum() {
593        assert_eq!(CodeBlockState::None, CodeBlockState::None);
594        assert_eq!(CodeBlockState::Fenced, CodeBlockState::Fenced);
595        assert_eq!(CodeBlockState::Indented, CodeBlockState::Indented);
596        assert_ne!(CodeBlockState::None, CodeBlockState::Fenced);
597    }
598
599    #[test]
600    fn test_code_block_info() {
601        let content = "Normal\n```\ncode\n```\nText";
602        let info = CodeBlockInfo::new(content);
603
604        assert!(!info.is_in_code_block(0));
605        assert!(info.is_in_code_block(1));
606        assert!(info.is_in_code_block(2));
607        assert!(info.is_in_code_block(3));
608        assert!(!info.is_in_code_block(4));
609
610        assert!(info.has_code_blocks());
611
612        // Test out of bounds
613        assert!(!info.is_in_code_block(100));
614    }
615
616    #[test]
617    fn test_code_block_info_code_spans() {
618        let content = "Text with `inline code` here";
619        let info = CodeBlockInfo::new(content);
620
621        assert!(info.has_code_spans());
622        assert!(!info.has_code_blocks());
623
624        // Test position inside code span
625        assert!(info.is_in_code_span(0, 11)); // Start of `inline
626        assert!(info.is_in_code_span(0, 15)); // Inside inline code
627        assert!(!info.is_in_code_span(0, 5)); // Before code span
628        assert!(!info.is_in_code_span(0, 25)); // After code span
629    }
630
631    #[test]
632    fn test_compute_code_blocks() {
633        let content = "Normal\n```\ncode\n```\n    indented";
634        let states = compute_code_blocks(content);
635
636        assert_eq!(states[0], CodeBlockState::None);
637        assert_eq!(states[1], CodeBlockState::Fenced);
638        assert_eq!(states[2], CodeBlockState::Fenced);
639        assert_eq!(states[3], CodeBlockState::Fenced);
640        assert_eq!(states[4], CodeBlockState::Indented);
641    }
642
643    #[test]
644    fn test_compute_code_spans() {
645        let content = "Text `code` and ``double`` backticks";
646        let spans = compute_code_spans(content);
647
648        assert_eq!(spans.len(), 2);
649        // First span: `code`
650        assert_eq!(&content[spans[0].0..spans[0].1], "`code`");
651        // Second span: ``double``
652        assert_eq!(&content[spans[1].0..spans[1].1], "``double``");
653
654        // Test escaped backticks
655        let content2 = r"Text \`not code\` but `real code`";
656        let spans2 = compute_code_spans(content2);
657        assert_eq!(spans2.len(), 1);
658        assert!(content2[spans2[0].0..spans2[0].1].contains("real code"));
659    }
660
661    #[test]
662    fn test_code_block_style() {
663        assert_eq!(CodeBlockStyle::Fenced.to_string(), "fenced");
664        assert_eq!(CodeBlockStyle::Indented.to_string(), "indented");
665        assert_eq!(CodeBlockStyle::Consistent.to_string(), "consistent");
666
667        assert_eq!(CodeBlockStyle::default(), CodeBlockStyle::Consistent);
668    }
669
670    #[test]
671    fn test_nested_code_blocks() {
672        // Nested code blocks don't exist in markdown, but test edge cases
673        let content = "```\n```\ncode\n```\n```";
674        let result = CodeBlockUtils::identify_code_block_lines(content);
675        // First ``` starts a block, second ``` ends it, third starts new block
676        assert_eq!(result, vec![true, true, false, true, true]);
677    }
678
679    #[test]
680    fn test_unicode_content() {
681        let content = "```rust\nlet 你好 = \"世界\";\n```";
682        let result = CodeBlockUtils::identify_code_block_lines(content);
683        assert_eq!(result, vec![true, true, true]);
684
685        assert_eq!(CodeBlockUtils::get_language_specifier("```🦀"), Some("🦀".to_string()));
686    }
687
688    #[test]
689    fn test_edge_cases() {
690        // Empty content
691        assert_eq!(CodeBlockUtils::identify_code_block_lines(""), Vec::<bool>::new());
692        assert!(!CodeBlockUtils::is_in_code_block("", 0));
693
694        // Just delimiters
695        assert_eq!(CodeBlockUtils::identify_code_block_lines("```"), vec![true]);
696        assert_eq!(CodeBlockUtils::identify_code_block_lines("~~~"), vec![true]);
697
698        // Mixed fence types (should not close each other)
699        let content = "```\ncode\n~~~\nmore\n```";
700        let result = CodeBlockUtils::identify_code_block_lines(content);
701        assert_eq!(result, vec![true, true, true, true, true]);
702    }
703}