Skip to main content

rumdl_lib/rules/
md070_nested_code_fence.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD070: Nested code fence collision detection
4///
5/// Detects when a fenced code block contains fence markers that would cause
6/// premature closure. Suggests using longer fences to avoid this issue.
7///
8/// Checks languages where triple backtick sequences commonly appear:
9/// markdown, Python, JavaScript, shell, Rust, Go, and others with multiline
10/// strings, heredocs, template literals, or doc comments.
11///
12/// See [docs/md070.md](../../docs/md070.md) for full documentation.
13#[derive(Clone, Default)]
14pub struct MD070NestedCodeFence;
15
16impl MD070NestedCodeFence {
17    pub fn new() -> Self {
18        Self
19    }
20
21    /// Check if the given language should be checked for nested fences.
22    /// Covers languages where triple backtick sequences commonly appear in source:
23    /// multiline strings with embedded markdown, heredocs, doc comments, template
24    /// literals, and data formats with multiline string values.
25    fn should_check_language(lang: &str) -> bool {
26        let base = lang.split_whitespace().next().unwrap_or("");
27        matches!(
28            base.to_ascii_lowercase().as_str(),
29            // Documentation / markup
30            ""
31                | "markdown"
32                | "md"
33                | "mdx"
34                | "text"
35                | "txt"
36                | "plain"
37                // Multiline strings / docstrings
38                | "python"
39                | "py"
40                | "ruby"
41                | "rb"
42                | "perl"
43                | "pl"
44                | "php"
45                | "lua"
46                | "r"
47                | "rmd"
48                | "rmarkdown"
49                // Template literals / raw strings
50                | "javascript"
51                | "js"
52                | "jsx"
53                | "mjs"
54                | "cjs"
55                | "typescript"
56                | "ts"
57                | "tsx"
58                | "mts"
59                | "rust"
60                | "rs"
61                | "go"
62                | "golang"
63                | "swift"
64                | "kotlin"
65                | "kt"
66                | "kts"
67                | "java"
68                | "csharp"
69                | "cs"
70                | "c#"
71                | "scala"
72                // Shell heredocs
73                | "shell"
74                | "sh"
75                | "bash"
76                | "zsh"
77                | "fish"
78                | "powershell"
79                | "ps1"
80                | "pwsh"
81                // Data / config formats
82                | "yaml"
83                | "yml"
84                | "toml"
85                | "json"
86                | "jsonc"
87                | "json5"
88                // Template engines
89                | "jinja"
90                | "jinja2"
91                | "handlebars"
92                | "hbs"
93                | "liquid"
94                | "nunjucks"
95                | "njk"
96                | "ejs"
97                // Terminal output
98                | "console"
99                | "terminal"
100        )
101    }
102
103    /// Find the maximum fence length of same-character fences in the content
104    /// Returns (line_offset, fence_length) of the first collision, if any
105    fn find_fence_collision(content: &str, fence_char: char, outer_fence_length: usize) -> Option<(usize, usize)> {
106        for (line_idx, line) in content.lines().enumerate() {
107            let trimmed = line.trim_start();
108
109            // Check if line starts with the same fence character
110            if trimmed.starts_with(fence_char) {
111                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
112
113                // Collision if same char AND at least as long as outer fence
114                if count >= outer_fence_length {
115                    // Verify it looks like a fence line (only fence chars + optional language/whitespace)
116                    let after_fence = &trimmed[count..];
117                    // A fence line is: fence chars + optional language identifier + optional whitespace
118                    // We detect collision if:
119                    // - Line ends after fence chars (closing fence)
120                    // - Line has alphanumeric after fence (opening fence with language)
121                    // - Line has only whitespace after fence
122                    if after_fence.is_empty()
123                        || after_fence.trim().is_empty()
124                        || after_fence
125                            .chars()
126                            .next()
127                            .is_some_and(|c| c.is_alphabetic() || c == '{')
128                    {
129                        return Some((line_idx, count));
130                    }
131                }
132            }
133        }
134        None
135    }
136
137    /// Find the maximum fence length needed to safely contain the content
138    fn find_safe_fence_length(content: &str, fence_char: char) -> usize {
139        let mut max_fence = 0;
140
141        for line in content.lines() {
142            let trimmed = line.trim_start();
143            if trimmed.starts_with(fence_char) {
144                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
145                if count >= 3 {
146                    // Only count valid fence-like patterns
147                    let after_fence = &trimmed[count..];
148                    if after_fence.is_empty()
149                        || after_fence.trim().is_empty()
150                        || after_fence
151                            .chars()
152                            .next()
153                            .is_some_and(|c| c.is_alphabetic() || c == '{')
154                    {
155                        max_fence = max_fence.max(count);
156                    }
157                }
158            }
159        }
160
161        max_fence
162    }
163
164    /// Find the user's intended closing fence when a collision is detected.
165    /// Searches past the first (premature) closing fence for the last bare
166    /// fence of the same type before hitting a new opening fence.
167    fn find_intended_close(
168        lines: &[&str],
169        first_close: usize,
170        fence_char: char,
171        fence_length: usize,
172        opening_indent: usize,
173    ) -> usize {
174        let mut intended_close = first_close;
175        for (j, line_j) in lines.iter().enumerate().skip(first_close + 1) {
176            if Self::is_closing_fence(line_j, fence_char, fence_length) {
177                intended_close = j;
178            } else if Self::parse_fence_line(line_j)
179                .is_some_and(|(ind, ch, _, info)| ind <= opening_indent && ch == fence_char && !info.is_empty())
180            {
181                break;
182            }
183        }
184        intended_close
185    }
186
187    /// Parse a fence marker from a line, returning (indent, fence_char, fence_length, info_string)
188    fn parse_fence_line(line: &str) -> Option<(usize, char, usize, &str)> {
189        let indent = line.len() - line.trim_start().len();
190        // Per CommonMark, fence must have 0-3 spaces of indentation
191        if indent > 3 {
192            return None;
193        }
194
195        let trimmed = line.trim_start();
196
197        if trimmed.starts_with("```") {
198            let count = trimmed.chars().take_while(|&c| c == '`').count();
199            if count >= 3 {
200                let info = trimmed[count..].trim();
201                return Some((indent, '`', count, info));
202            }
203        } else if trimmed.starts_with("~~~") {
204            let count = trimmed.chars().take_while(|&c| c == '~').count();
205            if count >= 3 {
206                let info = trimmed[count..].trim();
207                return Some((indent, '~', count, info));
208            }
209        }
210
211        None
212    }
213
214    /// Check if a line is a valid closing fence for the given opening fence
215    /// Per CommonMark, closing fences can have 0-3 spaces of indentation regardless of opening fence
216    fn is_closing_fence(line: &str, fence_char: char, min_length: usize) -> bool {
217        let indent = line.len() - line.trim_start().len();
218        // Per CommonMark spec, closing fence can have 0-3 spaces of indentation
219        if indent > 3 {
220            return false;
221        }
222
223        let trimmed = line.trim_start();
224        if !trimmed.starts_with(fence_char) {
225            return false;
226        }
227
228        let count = trimmed.chars().take_while(|&c| c == fence_char).count();
229        if count < min_length {
230            return false;
231        }
232
233        // Closing fence must have only whitespace after fence chars
234        trimmed[count..].trim().is_empty()
235    }
236}
237
238impl Rule for MD070NestedCodeFence {
239    fn name(&self) -> &'static str {
240        "MD070"
241    }
242
243    fn description(&self) -> &'static str {
244        "Nested code fence collision - use longer fence to avoid premature closure"
245    }
246
247    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
248        let mut warnings = Vec::new();
249        let lines = ctx.raw_lines();
250
251        let mut i = 0;
252        while i < lines.len() {
253            // Skip lines in contexts that shouldn't be processed
254            if let Some(line_info) = ctx.lines.get(i)
255                && (line_info.in_front_matter || line_info.in_html_comment || line_info.in_html_block)
256            {
257                i += 1;
258                continue;
259            }
260
261            // Skip if we're already inside a code block (check previous line).
262            // This handles list-indented code blocks (4+ spaces) which our rule doesn't
263            // parse directly, but the context detects correctly. If the previous line
264            // is in a code block, this line is either content or a closing fence for
265            // that block - not a new opening fence.
266            if i > 0
267                && let Some(prev_line_info) = ctx.lines.get(i - 1)
268                && prev_line_info.in_code_block
269            {
270                i += 1;
271                continue;
272            }
273
274            let line = lines[i];
275
276            // Try to parse as opening fence
277            if let Some((_indent, fence_char, fence_length, info_string)) = Self::parse_fence_line(line) {
278                let block_start = i;
279
280                // Extract the language (first word of info string)
281                let language = info_string.split_whitespace().next().unwrap_or("");
282
283                // Find the closing fence
284                let mut block_end = None;
285                for (j, line_j) in lines.iter().enumerate().skip(i + 1) {
286                    if Self::is_closing_fence(line_j, fence_char, fence_length) {
287                        block_end = Some(j);
288                        break;
289                    }
290                }
291
292                if let Some(end_line) = block_end {
293                    // We have a complete code block from block_start to end_line
294                    // Check if we should analyze this block
295                    if Self::should_check_language(language) {
296                        // Get the content between fences
297                        let block_content: String = if block_start + 1 < end_line {
298                            lines[(block_start + 1)..end_line].join("\n")
299                        } else {
300                            String::new()
301                        };
302
303                        // Check for fence collision
304                        if let Some((collision_line_offset, _collision_length)) =
305                            Self::find_fence_collision(&block_content, fence_char, fence_length)
306                        {
307                            let collision_line_num = block_start + 1 + collision_line_offset + 1; // 1-indexed
308
309                            // Find the user's intended closing fence (may be past the
310                            // CommonMark-visible close when inner ``` causes premature closure)
311                            let indent = line.len() - line.trim_start().len();
312                            let intended_close =
313                                Self::find_intended_close(lines, end_line, fence_char, fence_length, indent);
314
315                            // Compute safe fence length from the full intended content
316                            let full_content: String = if block_start + 1 < intended_close {
317                                lines[(block_start + 1)..intended_close].join("\n")
318                            } else {
319                                block_content.clone()
320                            };
321                            let safe_length = Self::find_safe_fence_length(&full_content, fence_char) + 1;
322                            let suggested_fence: String = std::iter::repeat_n(fence_char, safe_length).collect();
323
324                            // Build a Fix that replaces the block from opening fence
325                            // through the intended closing fence. This must be safe for
326                            // direct application by the LSP code action path.
327                            let open_byte_start = ctx.line_index.get_line_start_byte(block_start + 1).unwrap_or(0);
328                            let close_byte_end = ctx
329                                .line_index
330                                .get_line_start_byte(intended_close + 2)
331                                .unwrap_or(ctx.content.len());
332
333                            let indent_str = &line[..indent];
334                            let closing_line = lines[intended_close];
335                            let closing_indent = &closing_line[..closing_line.len() - closing_line.trim_start().len()];
336                            let mut replacement = format!("{indent_str}{suggested_fence}");
337                            if !info_string.is_empty() {
338                                replacement.push_str(info_string);
339                            }
340                            replacement.push('\n');
341                            for content_line in &lines[(block_start + 1)..intended_close] {
342                                replacement.push_str(content_line);
343                                replacement.push('\n');
344                            }
345                            replacement.push_str(closing_indent);
346                            replacement.push_str(&suggested_fence);
347                            replacement.push('\n');
348
349                            warnings.push(LintWarning {
350                                rule_name: Some(self.name().to_string()),
351                                message: format!(
352                                    "Code block contains fence markers at line {collision_line_num} that interfere with block parsing — use {suggested_fence} for outer fence"
353                                ),
354                                line: block_start + 1,
355                                column: 1,
356                                end_line: intended_close + 1,
357                                end_column: lines[intended_close].len() + 1,
358                                severity: Severity::Warning,
359                                fix: Some(Fix {
360                                    range: (open_byte_start..close_byte_end),
361                                    replacement,
362                                }),
363                            });
364                        }
365                    }
366
367                    // Move past this code block
368                    i = end_line + 1;
369                    continue;
370                }
371            }
372
373            i += 1;
374        }
375
376        Ok(warnings)
377    }
378
379    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
380        let content = ctx.content;
381        let mut result = String::new();
382        let lines = ctx.raw_lines();
383
384        let mut i = 0;
385        while i < lines.len() {
386            // Skip lines where the rule is disabled via inline config
387            if ctx.is_rule_disabled(self.name(), i + 1) {
388                result.push_str(lines[i]);
389                result.push('\n');
390                i += 1;
391                continue;
392            }
393
394            // Skip lines in contexts that shouldn't be processed
395            if let Some(line_info) = ctx.lines.get(i)
396                && (line_info.in_front_matter || line_info.in_html_comment || line_info.in_html_block)
397            {
398                result.push_str(lines[i]);
399                result.push('\n');
400                i += 1;
401                continue;
402            }
403
404            // Skip if we're already inside a code block (check previous line)
405            if i > 0
406                && let Some(prev_line_info) = ctx.lines.get(i - 1)
407                && prev_line_info.in_code_block
408            {
409                result.push_str(lines[i]);
410                result.push('\n');
411                i += 1;
412                continue;
413            }
414
415            let line = lines[i];
416
417            // Try to parse as opening fence
418            if let Some((indent, fence_char, fence_length, info_string)) = Self::parse_fence_line(line) {
419                let block_start = i;
420
421                // Extract the language
422                let language = info_string.split_whitespace().next().unwrap_or("");
423
424                // Find the first closing fence (what CommonMark sees)
425                let mut first_close = None;
426                for (j, line_j) in lines.iter().enumerate().skip(i + 1) {
427                    if Self::is_closing_fence(line_j, fence_char, fence_length) {
428                        first_close = Some(j);
429                        break;
430                    }
431                }
432
433                if let Some(end_line) = first_close {
434                    // Check if we should fix this block
435                    if Self::should_check_language(language) {
436                        // Get the content between fences
437                        let block_content: String = if block_start + 1 < end_line {
438                            lines[(block_start + 1)..end_line].join("\n")
439                        } else {
440                            String::new()
441                        };
442
443                        // Check for fence collision
444                        if Self::find_fence_collision(&block_content, fence_char, fence_length).is_some() {
445                            let intended_close =
446                                Self::find_intended_close(lines, end_line, fence_char, fence_length, indent);
447
448                            // Get content between opening and intended close
449                            let full_block_content: String = if block_start + 1 < intended_close {
450                                lines[(block_start + 1)..intended_close].join("\n")
451                            } else {
452                                String::new()
453                            };
454
455                            let safe_length = Self::find_safe_fence_length(&full_block_content, fence_char) + 1;
456                            let suggested_fence: String = std::iter::repeat_n(fence_char, safe_length).collect();
457
458                            // Write fixed opening fence
459                            let opening_indent = " ".repeat(indent);
460                            result.push_str(&format!("{opening_indent}{suggested_fence}{info_string}\n"));
461
462                            // Write content
463                            for line_content in &lines[(block_start + 1)..intended_close] {
464                                result.push_str(line_content);
465                                result.push('\n');
466                            }
467
468                            // Write fixed closing fence
469                            let closing_line = lines[intended_close];
470                            let closing_indent = closing_line.len() - closing_line.trim_start().len();
471                            let closing_indent_str = " ".repeat(closing_indent);
472                            result.push_str(&format!("{closing_indent_str}{suggested_fence}\n"));
473
474                            i = intended_close + 1;
475                            continue;
476                        }
477                    }
478
479                    // No collision or not a checked language - preserve as-is
480                    for line_content in &lines[block_start..=end_line] {
481                        result.push_str(line_content);
482                        result.push('\n');
483                    }
484                    i = end_line + 1;
485                    continue;
486                }
487            }
488
489            // Not a fence line, preserve as-is
490            result.push_str(line);
491            result.push('\n');
492            i += 1;
493        }
494
495        // Remove trailing newline if original didn't have one
496        if !content.ends_with('\n') && result.ends_with('\n') {
497            result.pop();
498        }
499
500        Ok(result)
501    }
502
503    fn category(&self) -> RuleCategory {
504        RuleCategory::CodeBlock
505    }
506
507    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
508        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
509    }
510
511    fn as_any(&self) -> &dyn std::any::Any {
512        self
513    }
514
515    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
516    where
517        Self: Sized,
518    {
519        Box::new(MD070NestedCodeFence::new())
520    }
521}
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526    use crate::lint_context::LintContext;
527
528    fn run_check(content: &str) -> LintResult {
529        let rule = MD070NestedCodeFence::new();
530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
531        rule.check(&ctx)
532    }
533
534    fn run_fix(content: &str) -> Result<String, LintError> {
535        let rule = MD070NestedCodeFence::new();
536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
537        rule.fix(&ctx)
538    }
539
540    #[test]
541    fn test_no_collision_simple() {
542        let content = "```python\nprint('hello')\n```\n";
543        let result = run_check(content).unwrap();
544        assert!(result.is_empty(), "Simple code block should not trigger warning");
545    }
546
547    #[test]
548    fn test_no_collision_unchecked_language() {
549        // C is not checked for nested fences (triple backticks don't appear in C source)
550        let content = "```c\n```bash\necho hello\n```\n```\n";
551        let result = run_check(content).unwrap();
552        assert!(result.is_empty(), "Unchecked language should not trigger");
553    }
554
555    #[test]
556    fn test_collision_python_language() {
557        // Python is checked — triple-quoted strings commonly contain markdown
558        let content = "```python\n```json\n{}\n```\n```\n";
559        let result = run_check(content).unwrap();
560        assert_eq!(result.len(), 1, "Python should be checked for nested fences");
561        assert!(result[0].message.contains("````"));
562    }
563
564    #[test]
565    fn test_collision_javascript_language() {
566        let content = "```javascript\n```html\n<div></div>\n```\n```\n";
567        let result = run_check(content).unwrap();
568        assert_eq!(result.len(), 1, "JavaScript should be checked for nested fences");
569    }
570
571    #[test]
572    fn test_collision_shell_language() {
573        let content = "```bash\n```yaml\nkey: val\n```\n```\n";
574        let result = run_check(content).unwrap();
575        assert_eq!(result.len(), 1, "Shell should be checked for nested fences");
576    }
577
578    #[test]
579    fn test_collision_rust_language() {
580        let content = "```rust\n```toml\n[dep]\n```\n```\n";
581        let result = run_check(content).unwrap();
582        assert_eq!(result.len(), 1, "Rust should be checked for nested fences");
583    }
584
585    #[test]
586    fn test_no_collision_assembly_language() {
587        // Assembly, C, SQL etc. should NOT be checked
588        for lang in ["asm", "c", "cpp", "sql", "css", "fortran"] {
589            let content = format!("```{lang}\n```inner\ncontent\n```\n```\n");
590            let result = run_check(&content).unwrap();
591            assert!(result.is_empty(), "{lang} should not be checked for nested fences");
592        }
593    }
594
595    #[test]
596    fn test_collision_markdown_language() {
597        let content = "```markdown\n```python\ncode()\n```\n```\n";
598        let result = run_check(content).unwrap();
599        assert_eq!(result.len(), 1, "Should emit single warning for collision");
600        assert!(result[0].message.contains("fence markers at line"));
601        assert!(result[0].message.contains("interfere with block parsing"));
602        assert!(result[0].message.contains("use ````"));
603    }
604
605    #[test]
606    fn test_collision_empty_language() {
607        // Empty language (no language specified) is checked
608        let content = "```\n```python\ncode()\n```\n```\n";
609        let result = run_check(content).unwrap();
610        assert_eq!(result.len(), 1, "Empty language should be checked");
611    }
612
613    #[test]
614    fn test_no_collision_longer_outer_fence() {
615        let content = "````markdown\n```python\ncode()\n```\n````\n";
616        let result = run_check(content).unwrap();
617        assert!(result.is_empty(), "Longer outer fence should not trigger warning");
618    }
619
620    #[test]
621    fn test_tilde_fence_ignores_backticks() {
622        // Tildes and backticks don't conflict
623        let content = "~~~markdown\n```python\ncode()\n```\n~~~\n";
624        let result = run_check(content).unwrap();
625        assert!(result.is_empty(), "Different fence types should not collide");
626    }
627
628    #[test]
629    fn test_tilde_collision() {
630        let content = "~~~markdown\n~~~python\ncode()\n~~~\n~~~\n";
631        let result = run_check(content).unwrap();
632        assert_eq!(result.len(), 1, "Same fence type should collide");
633        assert!(result[0].message.contains("~~~~"));
634    }
635
636    #[test]
637    fn test_fix_increases_fence_length() {
638        let content = "```markdown\n```python\ncode()\n```\n```\n";
639        let fixed = run_fix(content).unwrap();
640        assert!(fixed.starts_with("````markdown"), "Should increase to 4 backticks");
641        assert!(
642            fixed.contains("````\n") || fixed.ends_with("````"),
643            "Closing should also be 4 backticks"
644        );
645    }
646
647    #[test]
648    fn test_fix_handles_longer_inner_fence() {
649        // Inner fence has 5 backticks, so outer needs 6
650        let content = "```markdown\n`````python\ncode()\n`````\n```\n";
651        let fixed = run_fix(content).unwrap();
652        assert!(fixed.starts_with("``````markdown"), "Should increase to 6 backticks");
653    }
654
655    #[test]
656    fn test_backticks_in_code_not_fence() {
657        // Template literals in JS shouldn't trigger
658        let content = "```markdown\nconst x = `template`;\n```\n";
659        let result = run_check(content).unwrap();
660        assert!(result.is_empty(), "Inline backticks should not be detected as fences");
661    }
662
663    #[test]
664    fn test_preserves_info_string() {
665        let content = "```markdown {.highlight}\n```python\ncode()\n```\n```\n";
666        let fixed = run_fix(content).unwrap();
667        assert!(
668            fixed.contains("````markdown {.highlight}"),
669            "Should preserve info string attributes"
670        );
671    }
672
673    #[test]
674    fn test_md_language_alias() {
675        let content = "```md\n```python\ncode()\n```\n```\n";
676        let result = run_check(content).unwrap();
677        assert_eq!(result.len(), 1, "md should be recognized as markdown");
678    }
679
680    #[test]
681    fn test_real_world_docs_case() {
682        // This is the actual pattern from docs/md031.md that triggered the PR
683        let content = r#"```markdown
6841. First item
685
686   ```python
687   code_in_list()
688   ```
689
6901. Second item
691
692```
693"#;
694        let result = run_check(content).unwrap();
695        assert_eq!(result.len(), 1, "Should emit single warning for nested fence issue");
696        assert!(result[0].message.contains("line 4")); // The nested ``` is on line 4
697
698        let fixed = run_fix(content).unwrap();
699        assert!(fixed.starts_with("````markdown"), "Should fix with longer fence");
700    }
701
702    #[test]
703    fn test_empty_code_block() {
704        let content = "```markdown\n```\n";
705        let result = run_check(content).unwrap();
706        assert!(result.is_empty(), "Empty code block should not trigger");
707    }
708
709    #[test]
710    fn test_multiple_code_blocks() {
711        // The markdown block has a collision (inner ```python closes it prematurely).
712        // The orphan closing fence (line 9) is NOT treated as a new opening fence
713        // because the context correctly detects it as part of the markdown block.
714        let content = r#"```python
715safe code
716```
717
718```markdown
719```python
720collision
721```
722```
723
724```javascript
725also safe
726```
727"#;
728        let result = run_check(content).unwrap();
729        // Only 1 warning for the markdown block collision.
730        // The orphan fence is correctly ignored (not parsed as new opening fence).
731        assert_eq!(result.len(), 1, "Should emit single warning for collision");
732        assert!(result[0].message.contains("line 6")); // The nested ```python is on line 6
733    }
734
735    #[test]
736    fn test_single_collision_properly_closed() {
737        // When the outer fence is properly longer, only the intended block triggers
738        let content = r#"```python
739safe code
740```
741
742````markdown
743```python
744collision
745```
746````
747
748```javascript
749also safe
750```
751"#;
752        let result = run_check(content).unwrap();
753        assert!(result.is_empty(), "Properly fenced blocks should not trigger");
754    }
755
756    #[test]
757    fn test_indented_code_block_in_list() {
758        let content = r#"- List item
759  ```markdown
760  ```python
761  nested
762  ```
763  ```
764"#;
765        let result = run_check(content).unwrap();
766        assert_eq!(result.len(), 1, "Should detect collision in indented block");
767        assert!(result[0].message.contains("````"));
768    }
769
770    #[test]
771    fn test_no_false_positive_list_indented_block() {
772        // 4-space indented code blocks in list context (GFM extension) should not
773        // cause false positives. The closing fence with 3-space indent should not
774        // be parsed as a new opening fence.
775        let content = r#"1. List item with code:
776
777    ```json
778    {"key": "value"}
779    ```
780
7812. Another item
782
783   ```python
784   code()
785   ```
786"#;
787        let result = run_check(content).unwrap();
788        // No collision - these are separate, well-formed code blocks
789        assert!(
790            result.is_empty(),
791            "List-indented code blocks should not trigger false positives"
792        );
793    }
794
795    // ==================== Comprehensive Edge Case Tests ====================
796
797    #[test]
798    fn test_case_insensitive_language() {
799        // MARKDOWN, Markdown, MD should all be checked
800        for lang in ["MARKDOWN", "Markdown", "MD", "Md", "mD"] {
801            let content = format!("```{lang}\n```python\ncode()\n```\n```\n");
802            let result = run_check(&content).unwrap();
803            assert_eq!(result.len(), 1, "{lang} should be recognized as markdown");
804        }
805    }
806
807    #[test]
808    fn test_unclosed_outer_fence() {
809        // If outer fence is never closed, no collision can be detected
810        let content = "```markdown\n```python\ncode()\n```\n";
811        let result = run_check(content).unwrap();
812        // The outer fence finds ```python as its closing fence (premature close)
813        // Then ```\n at the end becomes orphan - but context would handle this
814        assert!(result.len() <= 1, "Unclosed fence should not cause issues");
815    }
816
817    #[test]
818    fn test_deeply_nested_fences() {
819        // Multiple levels of nesting require progressively longer fences
820        let content = r#"```markdown
821````markdown
822```python
823code()
824```
825````
826```
827"#;
828        let result = run_check(content).unwrap();
829        // The outer ``` sees ```` as collision (4 >= 3)
830        assert_eq!(result.len(), 1, "Deep nesting should trigger warning");
831        assert!(result[0].message.contains("`````")); // Needs 5 to be safe
832    }
833
834    #[test]
835    fn test_very_long_fences() {
836        // 10 backtick fences should work correctly
837        let content = "``````````markdown\n```python\ncode()\n```\n``````````\n";
838        let result = run_check(content).unwrap();
839        assert!(result.is_empty(), "Very long outer fence should not trigger warning");
840    }
841
842    #[test]
843    fn test_blockquote_with_fence() {
844        // Fences inside blockquotes (CommonMark allows this)
845        let content = "> ```markdown\n> ```python\n> code()\n> ```\n> ```\n";
846        let result = run_check(content).unwrap();
847        // Blockquote prefixes are part of the line, so parsing may differ
848        // This documents current behavior
849        assert!(result.is_empty() || result.len() == 1);
850    }
851
852    #[test]
853    fn test_fence_with_attributes() {
854        // Info string with attributes like {.class #id}
855        let content = "```markdown {.highlight #example}\n```python\ncode()\n```\n```\n";
856        let result = run_check(content).unwrap();
857        assert_eq!(
858            result.len(),
859            1,
860            "Attributes in info string should not prevent detection"
861        );
862
863        let fixed = run_fix(content).unwrap();
864        assert!(
865            fixed.contains("````markdown {.highlight #example}"),
866            "Attributes should be preserved in fix"
867        );
868    }
869
870    #[test]
871    fn test_trailing_whitespace_in_info_string() {
872        let content = "```markdown   \n```python\ncode()\n```\n```\n";
873        let result = run_check(content).unwrap();
874        assert_eq!(result.len(), 1, "Trailing whitespace should not affect detection");
875    }
876
877    #[test]
878    fn test_only_closing_fence_pattern() {
879        // Content that has only closing fence patterns (no language)
880        let content = "```markdown\nsome text\n```\nmore text\n```\n";
881        let result = run_check(content).unwrap();
882        // The first ``` closes, second ``` is outside
883        assert!(result.is_empty(), "Properly closed block should not trigger");
884    }
885
886    #[test]
887    fn test_fence_at_end_of_file_no_newline() {
888        let content = "```markdown\n```python\ncode()\n```\n```";
889        let result = run_check(content).unwrap();
890        assert_eq!(result.len(), 1, "Should detect collision even without trailing newline");
891
892        let fixed = run_fix(content).unwrap();
893        assert!(!fixed.ends_with('\n'), "Should preserve lack of trailing newline");
894    }
895
896    #[test]
897    fn test_empty_lines_between_fences() {
898        let content = "```markdown\n\n\n```python\n\ncode()\n\n```\n\n```\n";
899        let result = run_check(content).unwrap();
900        assert_eq!(result.len(), 1, "Empty lines should not affect collision detection");
901    }
902
903    #[test]
904    fn test_tab_indented_opening_fence() {
905        // Tab at start of line - CommonMark says tab = 4 spaces for indentation.
906        // A 4-space indented fence is NOT a valid fenced code block per CommonMark
907        // (only 0-3 spaces allowed). However, our implementation counts characters,
908        // treating tab as 1 character. This means tab-indented fences ARE parsed.
909        // This is intentional: consistent with other rules in rumdl and matches
910        // common editor behavior where tab = 1 indent level.
911        let content = "\t```markdown\n```python\ncode()\n```\n```\n";
912        let result = run_check(content).unwrap();
913        // With tab treated as 1 char (< 3), this IS parsed as a fence and triggers collision
914        assert_eq!(result.len(), 1, "Tab-indented fence is parsed (tab = 1 char)");
915    }
916
917    #[test]
918    fn test_mixed_fence_types_no_collision() {
919        // Backticks outer, tildes inner - should never collide
920        let content = "```markdown\n~~~python\ncode()\n~~~\n```\n";
921        let result = run_check(content).unwrap();
922        assert!(result.is_empty(), "Different fence chars should not collide");
923
924        // Tildes outer, backticks inner
925        let content2 = "~~~markdown\n```python\ncode()\n```\n~~~\n";
926        let result2 = run_check(content2).unwrap();
927        assert!(result2.is_empty(), "Different fence chars should not collide");
928    }
929
930    #[test]
931    fn test_frontmatter_not_confused_with_fence() {
932        // YAML frontmatter uses --- which shouldn't be confused with fences
933        let content = "---\ntitle: Test\n---\n\n```markdown\n```python\ncode()\n```\n```\n";
934        let result = run_check(content).unwrap();
935        assert_eq!(result.len(), 1, "Should detect collision after frontmatter");
936    }
937
938    #[test]
939    fn test_html_comment_with_fence_inside() {
940        // Fences inside HTML comments should be ignored
941        let content = "<!-- ```markdown\n```python\ncode()\n``` -->\n\n```markdown\nreal content\n```\n";
942        let result = run_check(content).unwrap();
943        // The fences inside HTML comment should be skipped
944        assert!(result.is_empty(), "Fences in HTML comments should be ignored");
945    }
946
947    #[test]
948    fn test_consecutive_code_blocks() {
949        // Multiple consecutive markdown blocks, each with collision
950        let content = r#"```markdown
951```python
952a()
953```
954```
955
956```markdown
957```ruby
958b()
959```
960```
961"#;
962        let result = run_check(content).unwrap();
963        // Each markdown block has its own collision
964        assert!(!result.is_empty(), "Should detect collision in first block");
965    }
966
967    #[test]
968    fn test_numeric_info_string() {
969        // Numbers after fence - some parsers treat this differently
970        let content = "```123\n```456\ncode()\n```\n```\n";
971        let result = run_check(content).unwrap();
972        // "123" is not "markdown" or "md", so should not check
973        assert!(result.is_empty(), "Numeric info string is not markdown");
974    }
975
976    #[test]
977    fn test_collision_at_exact_length() {
978        // An empty ``` is the closing fence, not a collision.
979        // For a collision, the inner fence must have content that looks like an opening fence.
980        let content = "```markdown\n```python\ncode()\n```\n```\n";
981        let result = run_check(content).unwrap();
982        assert_eq!(
983            result.len(),
984            1,
985            "Same-length fence with language should trigger collision"
986        );
987
988        // Inner fence one shorter than outer - not a collision
989        let content2 = "````markdown\n```python\ncode()\n```\n````\n";
990        let result2 = run_check(content2).unwrap();
991        assert!(result2.is_empty(), "Shorter inner fence should not collide");
992
993        // Empty markdown block followed by another fence - not a collision
994        let content3 = "```markdown\n```\n";
995        let result3 = run_check(content3).unwrap();
996        assert!(result3.is_empty(), "Empty closing fence is not a collision");
997    }
998
999    #[test]
1000    fn test_fix_preserves_content_exactly() {
1001        // Fix should not modify the content between fences
1002        let content = "```markdown\n```python\n  indented\n\ttabbed\nspecial: !@#$%\n```\n```\n";
1003        let fixed = run_fix(content).unwrap();
1004        assert!(fixed.contains("  indented"), "Indentation should be preserved");
1005        assert!(fixed.contains("\ttabbed"), "Tabs should be preserved");
1006        assert!(fixed.contains("special: !@#$%"), "Special chars should be preserved");
1007    }
1008
1009    #[test]
1010    fn test_warning_line_numbers_accurate() {
1011        let content = "# Title\n\nParagraph\n\n```markdown\n```python\ncode()\n```\n```\n";
1012        let result = run_check(content).unwrap();
1013        assert_eq!(result.len(), 1);
1014        assert_eq!(result[0].line, 5, "Warning should be on opening fence line");
1015        assert!(result[0].message.contains("line 6"), "Collision line should be line 6");
1016    }
1017
1018    #[test]
1019    fn test_should_skip_optimization() {
1020        let rule = MD070NestedCodeFence::new();
1021
1022        // No code-like content
1023        let ctx1 = LintContext::new("Just plain text", crate::config::MarkdownFlavor::Standard, None);
1024        assert!(
1025            rule.should_skip(&ctx1),
1026            "Should skip content without backticks or tildes"
1027        );
1028
1029        // Has backticks
1030        let ctx2 = LintContext::new("Has `code`", crate::config::MarkdownFlavor::Standard, None);
1031        assert!(!rule.should_skip(&ctx2), "Should not skip content with backticks");
1032
1033        // Has tildes
1034        let ctx3 = LintContext::new("Has ~~~", crate::config::MarkdownFlavor::Standard, None);
1035        assert!(!rule.should_skip(&ctx3), "Should not skip content with tildes");
1036
1037        // Empty
1038        let ctx4 = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
1039        assert!(rule.should_skip(&ctx4), "Should skip empty content");
1040    }
1041
1042    #[test]
1043    fn test_python_triplestring_fence_collision_fix() {
1044        // Reproduces GitHub issue #518: Python triple-quoted strings with embedded
1045        // markdown cause premature fence closure
1046        let content = "# Test\n\n```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n```\n";
1047        let result = run_check(content).unwrap();
1048        assert_eq!(result.len(), 1, "Should detect collision in python block");
1049        assert!(result[0].fix.is_some(), "Warning should be marked as fixable");
1050
1051        let fixed = run_fix(content).unwrap();
1052        assert!(
1053            fixed.contains("````python"),
1054            "Should upgrade opening fence to 4 backticks"
1055        );
1056        assert!(
1057            fixed.contains("````\n") || fixed.ends_with("````"),
1058            "Should upgrade closing fence to 4 backticks"
1059        );
1060        // Content between fences should be preserved
1061        assert!(fixed.contains("```json"), "Inner fences should be preserved as content");
1062    }
1063
1064    #[test]
1065    fn test_warning_is_fixable() {
1066        // All MD070 warnings must have fix.is_some() so the fix coordinator calls fix()
1067        let content = "```markdown\n```python\ncode()\n```\n```\n";
1068        let result = run_check(content).unwrap();
1069        assert_eq!(result.len(), 1);
1070        assert!(
1071            result[0].fix.is_some(),
1072            "MD070 warnings must be marked fixable for the fix coordinator"
1073        );
1074    }
1075
1076    #[test]
1077    fn test_fix_via_warning_struct_is_safe() {
1078        // The Fix on warnings is used directly by the LSP code action path.
1079        // It must produce valid output (not delete the fence or corrupt the file).
1080        let content = "```markdown\n```python\ncode()\n```\n```\n";
1081        let result = run_check(content).unwrap();
1082        assert_eq!(result.len(), 1);
1083
1084        let fix = result[0].fix.as_ref().unwrap();
1085        // Apply the Fix directly (simulating LSP path)
1086        let mut fixed = String::new();
1087        fixed.push_str(&content[..fix.range.start]);
1088        fixed.push_str(&fix.replacement);
1089        fixed.push_str(&content[fix.range.end..]);
1090
1091        // The fixed content should have upgraded fences
1092        assert!(
1093            fixed.contains("````markdown"),
1094            "Direct Fix application should upgrade opening fence, got: {fixed}"
1095        );
1096        assert!(
1097            fixed.contains("````\n") || fixed.ends_with("````"),
1098            "Direct Fix application should upgrade closing fence, got: {fixed}"
1099        );
1100        // Content should be preserved
1101        assert!(
1102            fixed.contains("```python"),
1103            "Inner content should be preserved, got: {fixed}"
1104        );
1105    }
1106
1107    #[test]
1108    fn test_fix_via_warning_struct_python_block() {
1109        // Test the LSP code action path for a Python block where CommonMark's
1110        // closing fence differs from the user's intended closing fence.
1111        // CommonMark sees: ```python (line 1) closed by bare ``` (line 6).
1112        // User intended: ```python (line 1) closed by ``` (line 10).
1113        let content = "```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n    print(text)\nf()\n```\n";
1114        let result = run_check(content).unwrap();
1115        assert_eq!(result.len(), 1);
1116
1117        let fix = result[0].fix.as_ref().unwrap();
1118        let mut fixed = String::new();
1119        fixed.push_str(&content[..fix.range.start]);
1120        fixed.push_str(&fix.replacement);
1121        fixed.push_str(&content[fix.range.end..]);
1122
1123        // The Fix must cover the full intended block (lines 1-10), not just
1124        // the CommonMark-visible block (lines 1-6). Verify the fixed content
1125        // has one code block containing ALL the Python code.
1126        assert!(
1127            fixed.starts_with("````python\n"),
1128            "Should upgrade opening fence, got:\n{fixed}"
1129        );
1130        assert!(
1131            fixed.contains("````\n") || fixed.trim_end().ends_with("````"),
1132            "Should upgrade closing fence, got:\n{fixed}"
1133        );
1134        // ALL Python code must be between the fences
1135        let fence_start = fixed.find("````python\n").unwrap();
1136        let after_open = fence_start + "````python\n".len();
1137        let close_pos = fixed[after_open..]
1138            .find("\n````\n")
1139            .or_else(|| fixed[after_open..].find("\n````"));
1140        assert!(
1141            close_pos.is_some(),
1142            "Should have closing fence after content, got:\n{fixed}"
1143        );
1144        let block_content = &fixed[after_open..after_open + close_pos.unwrap()];
1145        assert!(
1146            block_content.contains("print(text)"),
1147            "print(text) must be inside the code block, got block:\n{block_content}"
1148        );
1149        assert!(
1150            block_content.contains("f()"),
1151            "f() must be inside the code block, got block:\n{block_content}"
1152        );
1153        assert!(
1154            block_content.contains("```json"),
1155            "Inner fences must be preserved as content, got block:\n{block_content}"
1156        );
1157    }
1158
1159    #[test]
1160    fn test_fix_via_apply_warning_fixes() {
1161        // End-to-end test of the LSP fix path using apply_warning_fixes
1162        let content = "```markdown\n```python\ncode()\n```\n```\n";
1163        let result = run_check(content).unwrap();
1164        assert_eq!(result.len(), 1);
1165
1166        let fixed = crate::utils::fix_utils::apply_warning_fixes(content, &result).unwrap();
1167        assert!(
1168            fixed.contains("````markdown"),
1169            "apply_warning_fixes should upgrade opening fence"
1170        );
1171        assert!(
1172            fixed.contains("````\n") || fixed.ends_with("````"),
1173            "apply_warning_fixes should upgrade closing fence"
1174        );
1175
1176        // Re-check should find no issues
1177        let ctx2 = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1178        let rule = MD070NestedCodeFence::new();
1179        let result2 = rule.check(&ctx2).unwrap();
1180        assert!(
1181            result2.is_empty(),
1182            "Re-check after LSP fix should find no issues, got: {:?}",
1183            result2.iter().map(|w| &w.message).collect::<Vec<_>>()
1184        );
1185    }
1186}