rumdl_lib/rules/
md070_nested_code_fence.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD070: Nested code fence collision detection
4///
5/// Detects when a fenced code block contains fence markers that would cause
6/// premature closure. Suggests using longer fences to avoid this issue.
7///
8/// Checks languages where triple backtick sequences commonly appear:
9/// markdown, Python, JavaScript, shell, Rust, Go, and others with multiline
10/// strings, heredocs, template literals, or doc comments.
11///
12/// See [docs/md070.md](../../docs/md070.md) for full documentation.
13#[derive(Clone, Default)]
14pub struct MD070NestedCodeFence;
15
16impl MD070NestedCodeFence {
17    pub fn new() -> Self {
18        Self
19    }
20
21    /// Check if the given language should be checked for nested fences.
22    /// Covers languages where triple backtick sequences commonly appear in source:
23    /// multiline strings with embedded markdown, heredocs, doc comments, template
24    /// literals, and data formats with multiline string values.
25    fn should_check_language(lang: &str) -> bool {
26        let base = lang.split_whitespace().next().unwrap_or("");
27        matches!(
28            base.to_ascii_lowercase().as_str(),
29            // Documentation / markup
30            ""
31                | "markdown"
32                | "md"
33                | "mdx"
34                | "text"
35                | "txt"
36                | "plain"
37                // Multiline strings / docstrings
38                | "python"
39                | "py"
40                | "ruby"
41                | "rb"
42                | "perl"
43                | "pl"
44                | "php"
45                | "lua"
46                | "r"
47                | "rmd"
48                | "rmarkdown"
49                // Template literals / raw strings
50                | "javascript"
51                | "js"
52                | "jsx"
53                | "mjs"
54                | "cjs"
55                | "typescript"
56                | "ts"
57                | "tsx"
58                | "mts"
59                | "rust"
60                | "rs"
61                | "go"
62                | "golang"
63                | "swift"
64                | "kotlin"
65                | "kt"
66                | "kts"
67                | "java"
68                | "csharp"
69                | "cs"
70                | "c#"
71                | "scala"
72                // Shell heredocs
73                | "shell"
74                | "sh"
75                | "bash"
76                | "zsh"
77                | "fish"
78                | "powershell"
79                | "ps1"
80                | "pwsh"
81                // Data / config formats
82                | "yaml"
83                | "yml"
84                | "toml"
85                | "json"
86                | "jsonc"
87                | "json5"
88                // Template engines
89                | "jinja"
90                | "jinja2"
91                | "handlebars"
92                | "hbs"
93                | "liquid"
94                | "nunjucks"
95                | "njk"
96                | "ejs"
97                // Terminal output
98                | "console"
99                | "terminal"
100        )
101    }
102
103    /// Find the maximum fence length of same-character fences in the content
104    /// Returns (line_offset, fence_length) of the first collision, if any
105    fn find_fence_collision(content: &str, fence_char: char, outer_fence_length: usize) -> Option<(usize, usize)> {
106        for (line_idx, line) in content.lines().enumerate() {
107            let trimmed = line.trim_start();
108
109            // Check if line starts with the same fence character
110            if trimmed.starts_with(fence_char) {
111                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
112
113                // Collision if same char AND at least as long as outer fence
114                if count >= outer_fence_length {
115                    // Verify it looks like a fence line (only fence chars + optional language/whitespace)
116                    let after_fence = &trimmed[count..];
117                    // A fence line is: fence chars + optional language identifier + optional whitespace
118                    // We detect collision if:
119                    // - Line ends after fence chars (closing fence)
120                    // - Line has alphanumeric after fence (opening fence with language)
121                    // - Line has only whitespace after fence
122                    if after_fence.is_empty()
123                        || after_fence.trim().is_empty()
124                        || after_fence
125                            .chars()
126                            .next()
127                            .is_some_and(|c| c.is_alphabetic() || c == '{')
128                    {
129                        return Some((line_idx, count));
130                    }
131                }
132            }
133        }
134        None
135    }
136
137    /// Find the maximum fence length needed to safely contain the content
138    fn find_safe_fence_length(content: &str, fence_char: char) -> usize {
139        let mut max_fence = 0;
140
141        for line in content.lines() {
142            let trimmed = line.trim_start();
143            if trimmed.starts_with(fence_char) {
144                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
145                if count >= 3 {
146                    // Only count valid fence-like patterns
147                    let after_fence = &trimmed[count..];
148                    if after_fence.is_empty()
149                        || after_fence.trim().is_empty()
150                        || after_fence
151                            .chars()
152                            .next()
153                            .is_some_and(|c| c.is_alphabetic() || c == '{')
154                    {
155                        max_fence = max_fence.max(count);
156                    }
157                }
158            }
159        }
160
161        max_fence
162    }
163
164    /// Find the user's intended closing fence when a collision is detected.
165    /// Searches past the first (premature) closing fence for the last bare
166    /// fence of the same type before hitting a new opening fence.
167    fn find_intended_close(
168        lines: &[&str],
169        first_close: usize,
170        fence_char: char,
171        fence_length: usize,
172        opening_indent: usize,
173    ) -> usize {
174        let mut intended_close = first_close;
175        for (j, line_j) in lines.iter().enumerate().skip(first_close + 1) {
176            if Self::is_closing_fence(line_j, fence_char, fence_length) {
177                intended_close = j;
178            } else if Self::parse_fence_line(line_j)
179                .is_some_and(|(ind, ch, _, info)| ind <= opening_indent && ch == fence_char && !info.is_empty())
180            {
181                break;
182            }
183        }
184        intended_close
185    }
186
187    /// Parse a fence marker from a line, returning (indent, fence_char, fence_length, info_string)
188    fn parse_fence_line(line: &str) -> Option<(usize, char, usize, &str)> {
189        let indent = line.len() - line.trim_start().len();
190        // Per CommonMark, fence must have 0-3 spaces of indentation
191        if indent > 3 {
192            return None;
193        }
194
195        let trimmed = line.trim_start();
196
197        if trimmed.starts_with("```") {
198            let count = trimmed.chars().take_while(|&c| c == '`').count();
199            if count >= 3 {
200                let info = trimmed[count..].trim();
201                return Some((indent, '`', count, info));
202            }
203        } else if trimmed.starts_with("~~~") {
204            let count = trimmed.chars().take_while(|&c| c == '~').count();
205            if count >= 3 {
206                let info = trimmed[count..].trim();
207                return Some((indent, '~', count, info));
208            }
209        }
210
211        None
212    }
213
214    /// Check if a line is a valid closing fence for the given opening fence
215    /// Per CommonMark, closing fences can have 0-3 spaces of indentation regardless of opening fence
216    fn is_closing_fence(line: &str, fence_char: char, min_length: usize) -> bool {
217        let indent = line.len() - line.trim_start().len();
218        // Per CommonMark spec, closing fence can have 0-3 spaces of indentation
219        if indent > 3 {
220            return false;
221        }
222
223        let trimmed = line.trim_start();
224        if !trimmed.starts_with(fence_char) {
225            return false;
226        }
227
228        let count = trimmed.chars().take_while(|&c| c == fence_char).count();
229        if count < min_length {
230            return false;
231        }
232
233        // Closing fence must have only whitespace after fence chars
234        trimmed[count..].trim().is_empty()
235    }
236}
237
238impl Rule for MD070NestedCodeFence {
239    fn name(&self) -> &'static str {
240        "MD070"
241    }
242
243    fn description(&self) -> &'static str {
244        "Nested code fence collision - use longer fence to avoid premature closure"
245    }
246
247    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
248        let mut warnings = Vec::new();
249        let lines = ctx.raw_lines();
250
251        let mut i = 0;
252        while i < lines.len() {
253            // Skip lines in contexts that shouldn't be processed
254            if let Some(line_info) = ctx.lines.get(i)
255                && (line_info.in_front_matter
256                    || line_info.in_html_comment
257                    || line_info.in_mdx_comment
258                    || line_info.in_html_block)
259            {
260                i += 1;
261                continue;
262            }
263
264            // Skip if we're already inside a code block (check previous line).
265            // This handles list-indented code blocks (4+ spaces) which our rule doesn't
266            // parse directly, but the context detects correctly. If the previous line
267            // is in a code block, this line is either content or a closing fence for
268            // that block - not a new opening fence.
269            if i > 0
270                && let Some(prev_line_info) = ctx.lines.get(i - 1)
271                && prev_line_info.in_code_block
272            {
273                i += 1;
274                continue;
275            }
276
277            let line = lines[i];
278
279            // Try to parse as opening fence
280            if let Some((_indent, fence_char, fence_length, info_string)) = Self::parse_fence_line(line) {
281                let block_start = i;
282
283                // Extract the language (first word of info string)
284                let language = info_string.split_whitespace().next().unwrap_or("");
285
286                // Find the closing fence
287                let mut block_end = None;
288                for (j, line_j) in lines.iter().enumerate().skip(i + 1) {
289                    if Self::is_closing_fence(line_j, fence_char, fence_length) {
290                        block_end = Some(j);
291                        break;
292                    }
293                }
294
295                if let Some(end_line) = block_end {
296                    // We have a complete code block from block_start to end_line
297                    // Check if we should analyze this block
298                    if Self::should_check_language(language) {
299                        // Get the content between fences
300                        let block_content: String = if block_start + 1 < end_line {
301                            lines[(block_start + 1)..end_line].join("\n")
302                        } else {
303                            String::new()
304                        };
305
306                        // Check for fence collision
307                        if let Some((collision_line_offset, _collision_length)) =
308                            Self::find_fence_collision(&block_content, fence_char, fence_length)
309                        {
310                            let collision_line_num = block_start + 1 + collision_line_offset + 1; // 1-indexed
311
312                            // Find the user's intended closing fence (may be past the
313                            // CommonMark-visible close when inner ``` causes premature closure)
314                            let indent = line.len() - line.trim_start().len();
315                            let intended_close =
316                                Self::find_intended_close(lines, end_line, fence_char, fence_length, indent);
317
318                            // Compute safe fence length from the full intended content
319                            let full_content: String = if block_start + 1 < intended_close {
320                                lines[(block_start + 1)..intended_close].join("\n")
321                            } else {
322                                block_content.clone()
323                            };
324                            let safe_length = Self::find_safe_fence_length(&full_content, fence_char) + 1;
325                            let suggested_fence: String = std::iter::repeat_n(fence_char, safe_length).collect();
326
327                            // Build a Fix that replaces the block from opening fence
328                            // through the intended closing fence. This must be safe for
329                            // direct application by the LSP code action path.
330                            let open_byte_start = ctx.line_index.get_line_start_byte(block_start + 1).unwrap_or(0);
331                            let close_byte_end = ctx
332                                .line_index
333                                .get_line_start_byte(intended_close + 2)
334                                .unwrap_or(ctx.content.len());
335
336                            let indent_str = &line[..indent];
337                            let closing_line = lines[intended_close];
338                            let closing_indent = &closing_line[..closing_line.len() - closing_line.trim_start().len()];
339                            let mut replacement = format!("{indent_str}{suggested_fence}");
340                            if !info_string.is_empty() {
341                                replacement.push_str(info_string);
342                            }
343                            replacement.push('\n');
344                            for content_line in &lines[(block_start + 1)..intended_close] {
345                                replacement.push_str(content_line);
346                                replacement.push('\n');
347                            }
348                            replacement.push_str(closing_indent);
349                            replacement.push_str(&suggested_fence);
350                            // Only add trailing newline if the replaced range ends with one
351                            if close_byte_end <= ctx.content.len() && ctx.content[..close_byte_end].ends_with('\n') {
352                                replacement.push('\n');
353                            }
354
355                            warnings.push(LintWarning {
356                                rule_name: Some(self.name().to_string()),
357                                message: format!(
358                                    "Code block contains fence markers at line {collision_line_num} that interfere with block parsing — use {suggested_fence} for outer fence"
359                                ),
360                                line: block_start + 1,
361                                column: 1,
362                                end_line: intended_close + 1,
363                                end_column: lines[intended_close].len() + 1,
364                                severity: Severity::Warning,
365                                fix: Some(Fix::new(open_byte_start..close_byte_end, replacement)),
366                            });
367                        }
368                    }
369
370                    // Move past this code block
371                    i = end_line + 1;
372                    continue;
373                }
374            }
375
376            i += 1;
377        }
378
379        Ok(warnings)
380    }
381
382    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
383        if self.should_skip(ctx) {
384            return Ok(ctx.content.to_string());
385        }
386        let warnings = self.check(ctx)?;
387        if warnings.is_empty() {
388            return Ok(ctx.content.to_string());
389        }
390        let warnings =
391            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
392        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::FixFailed)
393    }
394
395    fn category(&self) -> RuleCategory {
396        RuleCategory::CodeBlock
397    }
398
399    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
400        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
401    }
402
403    fn as_any(&self) -> &dyn std::any::Any {
404        self
405    }
406
407    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
408    where
409        Self: Sized,
410    {
411        Box::new(MD070NestedCodeFence::new())
412    }
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418    use crate::lint_context::LintContext;
419
420    fn run_check(content: &str) -> LintResult {
421        let rule = MD070NestedCodeFence::new();
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
423        rule.check(&ctx)
424    }
425
426    fn run_fix(content: &str) -> Result<String, LintError> {
427        let rule = MD070NestedCodeFence::new();
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
429        rule.fix(&ctx)
430    }
431
432    #[test]
433    fn test_no_collision_simple() {
434        let content = "```python\nprint('hello')\n```\n";
435        let result = run_check(content).unwrap();
436        assert!(result.is_empty(), "Simple code block should not trigger warning");
437    }
438
439    #[test]
440    fn test_no_collision_unchecked_language() {
441        // C is not checked for nested fences (triple backticks don't appear in C source)
442        let content = "```c\n```bash\necho hello\n```\n```\n";
443        let result = run_check(content).unwrap();
444        assert!(result.is_empty(), "Unchecked language should not trigger");
445    }
446
447    #[test]
448    fn test_collision_python_language() {
449        // Python is checked — triple-quoted strings commonly contain markdown
450        let content = "```python\n```json\n{}\n```\n```\n";
451        let result = run_check(content).unwrap();
452        assert_eq!(result.len(), 1, "Python should be checked for nested fences");
453        assert!(result[0].message.contains("````"));
454    }
455
456    #[test]
457    fn test_collision_javascript_language() {
458        let content = "```javascript\n```html\n<div></div>\n```\n```\n";
459        let result = run_check(content).unwrap();
460        assert_eq!(result.len(), 1, "JavaScript should be checked for nested fences");
461    }
462
463    #[test]
464    fn test_collision_shell_language() {
465        let content = "```bash\n```yaml\nkey: val\n```\n```\n";
466        let result = run_check(content).unwrap();
467        assert_eq!(result.len(), 1, "Shell should be checked for nested fences");
468    }
469
470    #[test]
471    fn test_collision_rust_language() {
472        let content = "```rust\n```toml\n[dep]\n```\n```\n";
473        let result = run_check(content).unwrap();
474        assert_eq!(result.len(), 1, "Rust should be checked for nested fences");
475    }
476
477    #[test]
478    fn test_no_collision_assembly_language() {
479        // Assembly, C, SQL etc. should NOT be checked
480        for lang in ["asm", "c", "cpp", "sql", "css", "fortran"] {
481            let content = format!("```{lang}\n```inner\ncontent\n```\n```\n");
482            let result = run_check(&content).unwrap();
483            assert!(result.is_empty(), "{lang} should not be checked for nested fences");
484        }
485    }
486
487    #[test]
488    fn test_collision_markdown_language() {
489        let content = "```markdown\n```python\ncode()\n```\n```\n";
490        let result = run_check(content).unwrap();
491        assert_eq!(result.len(), 1, "Should emit single warning for collision");
492        assert!(result[0].message.contains("fence markers at line"));
493        assert!(result[0].message.contains("interfere with block parsing"));
494        assert!(result[0].message.contains("use ````"));
495    }
496
497    #[test]
498    fn test_collision_empty_language() {
499        // Empty language (no language specified) is checked
500        let content = "```\n```python\ncode()\n```\n```\n";
501        let result = run_check(content).unwrap();
502        assert_eq!(result.len(), 1, "Empty language should be checked");
503    }
504
505    #[test]
506    fn test_no_collision_longer_outer_fence() {
507        let content = "````markdown\n```python\ncode()\n```\n````\n";
508        let result = run_check(content).unwrap();
509        assert!(result.is_empty(), "Longer outer fence should not trigger warning");
510    }
511
512    #[test]
513    fn test_tilde_fence_ignores_backticks() {
514        // Tildes and backticks don't conflict
515        let content = "~~~markdown\n```python\ncode()\n```\n~~~\n";
516        let result = run_check(content).unwrap();
517        assert!(result.is_empty(), "Different fence types should not collide");
518    }
519
520    #[test]
521    fn test_tilde_collision() {
522        let content = "~~~markdown\n~~~python\ncode()\n~~~\n~~~\n";
523        let result = run_check(content).unwrap();
524        assert_eq!(result.len(), 1, "Same fence type should collide");
525        assert!(result[0].message.contains("~~~~"));
526    }
527
528    #[test]
529    fn test_fix_increases_fence_length() {
530        let content = "```markdown\n```python\ncode()\n```\n```\n";
531        let fixed = run_fix(content).unwrap();
532        assert!(fixed.starts_with("````markdown"), "Should increase to 4 backticks");
533        assert!(
534            fixed.contains("````\n") || fixed.ends_with("````"),
535            "Closing should also be 4 backticks"
536        );
537    }
538
539    #[test]
540    fn test_fix_handles_longer_inner_fence() {
541        // Inner fence has 5 backticks, so outer needs 6
542        let content = "```markdown\n`````python\ncode()\n`````\n```\n";
543        let fixed = run_fix(content).unwrap();
544        assert!(fixed.starts_with("``````markdown"), "Should increase to 6 backticks");
545    }
546
547    #[test]
548    fn test_backticks_in_code_not_fence() {
549        // Template literals in JS shouldn't trigger
550        let content = "```markdown\nconst x = `template`;\n```\n";
551        let result = run_check(content).unwrap();
552        assert!(result.is_empty(), "Inline backticks should not be detected as fences");
553    }
554
555    #[test]
556    fn test_preserves_info_string() {
557        let content = "```markdown {.highlight}\n```python\ncode()\n```\n```\n";
558        let fixed = run_fix(content).unwrap();
559        assert!(
560            fixed.contains("````markdown {.highlight}"),
561            "Should preserve info string attributes"
562        );
563    }
564
565    #[test]
566    fn test_md_language_alias() {
567        let content = "```md\n```python\ncode()\n```\n```\n";
568        let result = run_check(content).unwrap();
569        assert_eq!(result.len(), 1, "md should be recognized as markdown");
570    }
571
572    #[test]
573    fn test_real_world_docs_case() {
574        // This is the actual pattern from docs/md031.md that triggered the PR
575        let content = r#"```markdown
5761. First item
577
578   ```python
579   code_in_list()
580   ```
581
5821. Second item
583
584```
585"#;
586        let result = run_check(content).unwrap();
587        assert_eq!(result.len(), 1, "Should emit single warning for nested fence issue");
588        assert!(result[0].message.contains("line 4")); // The nested ``` is on line 4
589
590        let fixed = run_fix(content).unwrap();
591        assert!(fixed.starts_with("````markdown"), "Should fix with longer fence");
592    }
593
594    #[test]
595    fn test_empty_code_block() {
596        let content = "```markdown\n```\n";
597        let result = run_check(content).unwrap();
598        assert!(result.is_empty(), "Empty code block should not trigger");
599    }
600
601    #[test]
602    fn test_multiple_code_blocks() {
603        // The markdown block has a collision (inner ```python closes it prematurely).
604        // The orphan closing fence (line 9) is NOT treated as a new opening fence
605        // because the context correctly detects it as part of the markdown block.
606        let content = r#"```python
607safe code
608```
609
610```markdown
611```python
612collision
613```
614```
615
616```javascript
617also safe
618```
619"#;
620        let result = run_check(content).unwrap();
621        // Only 1 warning for the markdown block collision.
622        // The orphan fence is correctly ignored (not parsed as new opening fence).
623        assert_eq!(result.len(), 1, "Should emit single warning for collision");
624        assert!(result[0].message.contains("line 6")); // The nested ```python is on line 6
625    }
626
627    #[test]
628    fn test_single_collision_properly_closed() {
629        // When the outer fence is properly longer, only the intended block triggers
630        let content = r#"```python
631safe code
632```
633
634````markdown
635```python
636collision
637```
638````
639
640```javascript
641also safe
642```
643"#;
644        let result = run_check(content).unwrap();
645        assert!(result.is_empty(), "Properly fenced blocks should not trigger");
646    }
647
648    #[test]
649    fn test_indented_code_block_in_list() {
650        let content = r#"- List item
651  ```markdown
652  ```python
653  nested
654  ```
655  ```
656"#;
657        let result = run_check(content).unwrap();
658        assert_eq!(result.len(), 1, "Should detect collision in indented block");
659        assert!(result[0].message.contains("````"));
660    }
661
662    #[test]
663    fn test_no_false_positive_list_indented_block() {
664        // 4-space indented code blocks in list context (GFM extension) should not
665        // cause false positives. The closing fence with 3-space indent should not
666        // be parsed as a new opening fence.
667        let content = r#"1. List item with code:
668
669    ```json
670    {"key": "value"}
671    ```
672
6732. Another item
674
675   ```python
676   code()
677   ```
678"#;
679        let result = run_check(content).unwrap();
680        // No collision - these are separate, well-formed code blocks
681        assert!(
682            result.is_empty(),
683            "List-indented code blocks should not trigger false positives"
684        );
685    }
686
687    // ==================== Comprehensive Edge Case Tests ====================
688
689    #[test]
690    fn test_case_insensitive_language() {
691        // MARKDOWN, Markdown, MD should all be checked
692        for lang in ["MARKDOWN", "Markdown", "MD", "Md", "mD"] {
693            let content = format!("```{lang}\n```python\ncode()\n```\n```\n");
694            let result = run_check(&content).unwrap();
695            assert_eq!(result.len(), 1, "{lang} should be recognized as markdown");
696        }
697    }
698
699    #[test]
700    fn test_unclosed_outer_fence() {
701        // If outer fence is never closed, no collision can be detected
702        let content = "```markdown\n```python\ncode()\n```\n";
703        let result = run_check(content).unwrap();
704        // The outer fence finds ```python as its closing fence (premature close)
705        // Then ```\n at the end becomes orphan - but context would handle this
706        assert!(result.len() <= 1, "Unclosed fence should not cause issues");
707    }
708
709    #[test]
710    fn test_deeply_nested_fences() {
711        // Multiple levels of nesting require progressively longer fences
712        let content = r#"```markdown
713````markdown
714```python
715code()
716```
717````
718```
719"#;
720        let result = run_check(content).unwrap();
721        // The outer ``` sees ```` as collision (4 >= 3)
722        assert_eq!(result.len(), 1, "Deep nesting should trigger warning");
723        assert!(result[0].message.contains("`````")); // Needs 5 to be safe
724    }
725
726    #[test]
727    fn test_very_long_fences() {
728        // 10 backtick fences should work correctly
729        let content = "``````````markdown\n```python\ncode()\n```\n``````````\n";
730        let result = run_check(content).unwrap();
731        assert!(result.is_empty(), "Very long outer fence should not trigger warning");
732    }
733
734    #[test]
735    fn test_blockquote_with_fence() {
736        // Fences inside blockquotes (CommonMark allows this)
737        let content = "> ```markdown\n> ```python\n> code()\n> ```\n> ```\n";
738        let result = run_check(content).unwrap();
739        // Blockquote prefixes are part of the line, so parsing may differ
740        // This documents current behavior
741        assert!(result.is_empty() || result.len() == 1);
742    }
743
744    #[test]
745    fn test_fence_with_attributes() {
746        // Info string with attributes like {.class #id}
747        let content = "```markdown {.highlight #example}\n```python\ncode()\n```\n```\n";
748        let result = run_check(content).unwrap();
749        assert_eq!(
750            result.len(),
751            1,
752            "Attributes in info string should not prevent detection"
753        );
754
755        let fixed = run_fix(content).unwrap();
756        assert!(
757            fixed.contains("````markdown {.highlight #example}"),
758            "Attributes should be preserved in fix"
759        );
760    }
761
762    #[test]
763    fn test_trailing_whitespace_in_info_string() {
764        let content = "```markdown   \n```python\ncode()\n```\n```\n";
765        let result = run_check(content).unwrap();
766        assert_eq!(result.len(), 1, "Trailing whitespace should not affect detection");
767    }
768
769    #[test]
770    fn test_only_closing_fence_pattern() {
771        // Content that has only closing fence patterns (no language)
772        let content = "```markdown\nsome text\n```\nmore text\n```\n";
773        let result = run_check(content).unwrap();
774        // The first ``` closes, second ``` is outside
775        assert!(result.is_empty(), "Properly closed block should not trigger");
776    }
777
778    #[test]
779    fn test_fence_at_end_of_file_no_newline() {
780        let content = "```markdown\n```python\ncode()\n```\n```";
781        let result = run_check(content).unwrap();
782        assert_eq!(result.len(), 1, "Should detect collision even without trailing newline");
783
784        let fixed = run_fix(content).unwrap();
785        assert!(!fixed.ends_with('\n'), "Should preserve lack of trailing newline");
786    }
787
788    #[test]
789    fn test_empty_lines_between_fences() {
790        let content = "```markdown\n\n\n```python\n\ncode()\n\n```\n\n```\n";
791        let result = run_check(content).unwrap();
792        assert_eq!(result.len(), 1, "Empty lines should not affect collision detection");
793    }
794
795    #[test]
796    fn test_tab_indented_opening_fence() {
797        // Tab at start of line - CommonMark says tab = 4 spaces for indentation.
798        // A 4-space indented fence is NOT a valid fenced code block per CommonMark
799        // (only 0-3 spaces allowed). However, our implementation counts characters,
800        // treating tab as 1 character. This means tab-indented fences ARE parsed.
801        // This is intentional: consistent with other rules in rumdl and matches
802        // common editor behavior where tab = 1 indent level.
803        let content = "\t```markdown\n```python\ncode()\n```\n```\n";
804        let result = run_check(content).unwrap();
805        // With tab treated as 1 char (< 3), this IS parsed as a fence and triggers collision
806        assert_eq!(result.len(), 1, "Tab-indented fence is parsed (tab = 1 char)");
807    }
808
809    #[test]
810    fn test_mixed_fence_types_no_collision() {
811        // Backticks outer, tildes inner - should never collide
812        let content = "```markdown\n~~~python\ncode()\n~~~\n```\n";
813        let result = run_check(content).unwrap();
814        assert!(result.is_empty(), "Different fence chars should not collide");
815
816        // Tildes outer, backticks inner
817        let content2 = "~~~markdown\n```python\ncode()\n```\n~~~\n";
818        let result2 = run_check(content2).unwrap();
819        assert!(result2.is_empty(), "Different fence chars should not collide");
820    }
821
822    #[test]
823    fn test_frontmatter_not_confused_with_fence() {
824        // YAML frontmatter uses --- which shouldn't be confused with fences
825        let content = "---\ntitle: Test\n---\n\n```markdown\n```python\ncode()\n```\n```\n";
826        let result = run_check(content).unwrap();
827        assert_eq!(result.len(), 1, "Should detect collision after frontmatter");
828    }
829
830    #[test]
831    fn test_html_comment_with_fence_inside() {
832        // Fences inside HTML comments should be ignored
833        let content = "<!-- ```markdown\n```python\ncode()\n``` -->\n\n```markdown\nreal content\n```\n";
834        let result = run_check(content).unwrap();
835        // The fences inside HTML comment should be skipped
836        assert!(result.is_empty(), "Fences in HTML comments should be ignored");
837    }
838
839    #[test]
840    fn test_consecutive_code_blocks() {
841        // Multiple consecutive markdown blocks, each with collision
842        let content = r#"```markdown
843```python
844a()
845```
846```
847
848```markdown
849```ruby
850b()
851```
852```
853"#;
854        let result = run_check(content).unwrap();
855        // Each markdown block has its own collision
856        assert!(!result.is_empty(), "Should detect collision in first block");
857    }
858
859    #[test]
860    fn test_numeric_info_string() {
861        // Numbers after fence - some parsers treat this differently
862        let content = "```123\n```456\ncode()\n```\n```\n";
863        let result = run_check(content).unwrap();
864        // "123" is not "markdown" or "md", so should not check
865        assert!(result.is_empty(), "Numeric info string is not markdown");
866    }
867
868    #[test]
869    fn test_collision_at_exact_length() {
870        // An empty ``` is the closing fence, not a collision.
871        // For a collision, the inner fence must have content that looks like an opening fence.
872        let content = "```markdown\n```python\ncode()\n```\n```\n";
873        let result = run_check(content).unwrap();
874        assert_eq!(
875            result.len(),
876            1,
877            "Same-length fence with language should trigger collision"
878        );
879
880        // Inner fence one shorter than outer - not a collision
881        let content2 = "````markdown\n```python\ncode()\n```\n````\n";
882        let result2 = run_check(content2).unwrap();
883        assert!(result2.is_empty(), "Shorter inner fence should not collide");
884
885        // Empty markdown block followed by another fence - not a collision
886        let content3 = "```markdown\n```\n";
887        let result3 = run_check(content3).unwrap();
888        assert!(result3.is_empty(), "Empty closing fence is not a collision");
889    }
890
891    #[test]
892    fn test_fix_preserves_content_exactly() {
893        // Fix should not modify the content between fences
894        let content = "```markdown\n```python\n  indented\n\ttabbed\nspecial: !@#$%\n```\n```\n";
895        let fixed = run_fix(content).unwrap();
896        assert!(fixed.contains("  indented"), "Indentation should be preserved");
897        assert!(fixed.contains("\ttabbed"), "Tabs should be preserved");
898        assert!(fixed.contains("special: !@#$%"), "Special chars should be preserved");
899    }
900
901    #[test]
902    fn test_warning_line_numbers_accurate() {
903        let content = "# Title\n\nParagraph\n\n```markdown\n```python\ncode()\n```\n```\n";
904        let result = run_check(content).unwrap();
905        assert_eq!(result.len(), 1);
906        assert_eq!(result[0].line, 5, "Warning should be on opening fence line");
907        assert!(result[0].message.contains("line 6"), "Collision line should be line 6");
908    }
909
910    #[test]
911    fn test_should_skip_optimization() {
912        let rule = MD070NestedCodeFence::new();
913
914        // No code-like content
915        let ctx1 = LintContext::new("Just plain text", crate::config::MarkdownFlavor::Standard, None);
916        assert!(
917            rule.should_skip(&ctx1),
918            "Should skip content without backticks or tildes"
919        );
920
921        // Has backticks
922        let ctx2 = LintContext::new("Has `code`", crate::config::MarkdownFlavor::Standard, None);
923        assert!(!rule.should_skip(&ctx2), "Should not skip content with backticks");
924
925        // Has tildes
926        let ctx3 = LintContext::new("Has ~~~", crate::config::MarkdownFlavor::Standard, None);
927        assert!(!rule.should_skip(&ctx3), "Should not skip content with tildes");
928
929        // Empty
930        let ctx4 = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
931        assert!(rule.should_skip(&ctx4), "Should skip empty content");
932    }
933
934    #[test]
935    fn test_python_triplestring_fence_collision_fix() {
936        // Reproduces GitHub issue #518: Python triple-quoted strings with embedded
937        // markdown cause premature fence closure
938        let content = "# Test\n\n```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n```\n";
939        let result = run_check(content).unwrap();
940        assert_eq!(result.len(), 1, "Should detect collision in python block");
941        assert!(result[0].fix.is_some(), "Warning should be marked as fixable");
942
943        let fixed = run_fix(content).unwrap();
944        assert!(
945            fixed.contains("````python"),
946            "Should upgrade opening fence to 4 backticks"
947        );
948        assert!(
949            fixed.contains("````\n") || fixed.ends_with("````"),
950            "Should upgrade closing fence to 4 backticks"
951        );
952        // Content between fences should be preserved
953        assert!(fixed.contains("```json"), "Inner fences should be preserved as content");
954    }
955
956    #[test]
957    fn test_warning_is_fixable() {
958        // All MD070 warnings must have fix.is_some() so the fix coordinator calls fix()
959        let content = "```markdown\n```python\ncode()\n```\n```\n";
960        let result = run_check(content).unwrap();
961        assert_eq!(result.len(), 1);
962        assert!(
963            result[0].fix.is_some(),
964            "MD070 warnings must be marked fixable for the fix coordinator"
965        );
966    }
967
968    #[test]
969    fn test_fix_via_warning_struct_is_safe() {
970        // The Fix on warnings is used directly by the LSP code action path.
971        // It must produce valid output (not delete the fence or corrupt the file).
972        let content = "```markdown\n```python\ncode()\n```\n```\n";
973        let result = run_check(content).unwrap();
974        assert_eq!(result.len(), 1);
975
976        let fix = result[0].fix.as_ref().unwrap();
977        // Apply the Fix directly (simulating LSP path)
978        let mut fixed = String::new();
979        fixed.push_str(&content[..fix.range.start]);
980        fixed.push_str(&fix.replacement);
981        fixed.push_str(&content[fix.range.end..]);
982
983        // The fixed content should have upgraded fences
984        assert!(
985            fixed.contains("````markdown"),
986            "Direct Fix application should upgrade opening fence, got: {fixed}"
987        );
988        assert!(
989            fixed.contains("````\n") || fixed.ends_with("````"),
990            "Direct Fix application should upgrade closing fence, got: {fixed}"
991        );
992        // Content should be preserved
993        assert!(
994            fixed.contains("```python"),
995            "Inner content should be preserved, got: {fixed}"
996        );
997    }
998
999    #[test]
1000    fn test_fix_via_warning_struct_python_block() {
1001        // Test the LSP code action path for a Python block where CommonMark's
1002        // closing fence differs from the user's intended closing fence.
1003        // CommonMark sees: ```python (line 1) closed by bare ``` (line 6).
1004        // User intended: ```python (line 1) closed by ``` (line 10).
1005        let content = "```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n    print(text)\nf()\n```\n";
1006        let result = run_check(content).unwrap();
1007        assert_eq!(result.len(), 1);
1008
1009        let fix = result[0].fix.as_ref().unwrap();
1010        let mut fixed = String::new();
1011        fixed.push_str(&content[..fix.range.start]);
1012        fixed.push_str(&fix.replacement);
1013        fixed.push_str(&content[fix.range.end..]);
1014
1015        // The Fix must cover the full intended block (lines 1-10), not just
1016        // the CommonMark-visible block (lines 1-6). Verify the fixed content
1017        // has one code block containing ALL the Python code.
1018        assert!(
1019            fixed.starts_with("````python\n"),
1020            "Should upgrade opening fence, got:\n{fixed}"
1021        );
1022        assert!(
1023            fixed.contains("````\n") || fixed.trim_end().ends_with("````"),
1024            "Should upgrade closing fence, got:\n{fixed}"
1025        );
1026        // ALL Python code must be between the fences
1027        let fence_start = fixed.find("````python\n").unwrap();
1028        let after_open = fence_start + "````python\n".len();
1029        let close_pos = fixed[after_open..]
1030            .find("\n````\n")
1031            .or_else(|| fixed[after_open..].find("\n````"));
1032        assert!(
1033            close_pos.is_some(),
1034            "Should have closing fence after content, got:\n{fixed}"
1035        );
1036        let block_content = &fixed[after_open..after_open + close_pos.unwrap()];
1037        assert!(
1038            block_content.contains("print(text)"),
1039            "print(text) must be inside the code block, got block:\n{block_content}"
1040        );
1041        assert!(
1042            block_content.contains("f()"),
1043            "f() must be inside the code block, got block:\n{block_content}"
1044        );
1045        assert!(
1046            block_content.contains("```json"),
1047            "Inner fences must be preserved as content, got block:\n{block_content}"
1048        );
1049    }
1050
1051    #[test]
1052    fn test_fix_via_apply_warning_fixes() {
1053        // End-to-end test of the LSP fix path using apply_warning_fixes
1054        let content = "```markdown\n```python\ncode()\n```\n```\n";
1055        let result = run_check(content).unwrap();
1056        assert_eq!(result.len(), 1);
1057
1058        let fixed = crate::utils::fix_utils::apply_warning_fixes(content, &result).unwrap();
1059        assert!(
1060            fixed.contains("````markdown"),
1061            "apply_warning_fixes should upgrade opening fence"
1062        );
1063        assert!(
1064            fixed.contains("````\n") || fixed.ends_with("````"),
1065            "apply_warning_fixes should upgrade closing fence"
1066        );
1067
1068        // Re-check should find no issues
1069        let ctx2 = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1070        let rule = MD070NestedCodeFence::new();
1071        let result2 = rule.check(&ctx2).unwrap();
1072        assert!(
1073            result2.is_empty(),
1074            "Re-check after LSP fix should find no issues, got: {:?}",
1075            result2.iter().map(|w| &w.message).collect::<Vec<_>>()
1076        );
1077    }
1078
1079    /// Helper: run fix() then check() on the result, asserting 0 violations remain
1080    fn assert_fix_roundtrip(content: &str, label: &str) {
1081        let fixed = run_fix(content).unwrap();
1082        let rule = MD070NestedCodeFence::new();
1083        let ctx = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1084        let remaining = rule.check(&ctx).unwrap();
1085        assert!(
1086            remaining.is_empty(),
1087            "[{label}] fix() should resolve all violations, but {n} remain: {msgs:?}\nFixed content:\n{fixed}",
1088            n = remaining.len(),
1089            msgs = remaining.iter().map(|w| &w.message).collect::<Vec<_>>(),
1090        );
1091    }
1092
1093    #[test]
1094    fn test_fix_roundtrip_basic() {
1095        assert_fix_roundtrip("```markdown\n```python\ncode()\n```\n```\n", "basic collision");
1096    }
1097
1098    #[test]
1099    fn test_fix_roundtrip_longer_inner_fence() {
1100        assert_fix_roundtrip("```markdown\n`````python\ncode()\n`````\n```\n", "longer inner fence");
1101    }
1102
1103    #[test]
1104    fn test_fix_roundtrip_tilde_collision() {
1105        assert_fix_roundtrip("~~~markdown\n~~~python\ncode()\n~~~\n~~~\n", "tilde collision");
1106    }
1107
1108    #[test]
1109    fn test_fix_roundtrip_info_string_attrs() {
1110        assert_fix_roundtrip(
1111            "```markdown {.highlight}\n```python\ncode()\n```\n```\n",
1112            "info string with attrs",
1113        );
1114    }
1115
1116    #[test]
1117    fn test_fix_roundtrip_no_trailing_newline() {
1118        assert_fix_roundtrip("```markdown\n```python\ncode()\n```\n```", "no trailing newline");
1119    }
1120
1121    #[test]
1122    fn test_fix_roundtrip_python_triple_string() {
1123        assert_fix_roundtrip(
1124            "# Test\n\n```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n```\n",
1125            "python triple string",
1126        );
1127    }
1128
1129    #[test]
1130    fn test_fix_roundtrip_deeply_nested() {
1131        assert_fix_roundtrip(
1132            "```markdown\n````markdown\n```python\ncode()\n```\n````\n```\n",
1133            "deeply nested fences",
1134        );
1135    }
1136
1137    #[test]
1138    fn test_fix_roundtrip_real_world_docs() {
1139        let content = r#"```markdown
11401. First item
1141
1142   ```python
1143   code_in_list()
1144   ```
1145
11461. Second item
1147
1148```
1149"#;
1150        assert_fix_roundtrip(content, "real world docs case");
1151    }
1152
1153    #[test]
1154    fn test_fix_roundtrip_empty_lines() {
1155        assert_fix_roundtrip(
1156            "```markdown\n\n\n```python\n\ncode()\n\n```\n\n```\n",
1157            "empty lines between fences",
1158        );
1159    }
1160
1161    #[test]
1162    fn test_fix_no_change_when_no_violations() {
1163        let content = "````markdown\n```python\ncode()\n```\n````\n";
1164        let fixed = run_fix(content).unwrap();
1165        assert_eq!(fixed, content, "fix() should not modify content with no violations");
1166    }
1167
1168    #[test]
1169    fn test_fix_roundtrip_consecutive_collisions() {
1170        let content = r#"```markdown
1171```python
1172a()
1173```
1174```
1175
1176```md
1177```ruby
1178b()
1179```
1180```
1181"#;
1182        // Fix and verify each collision is resolved
1183        let fixed = run_fix(content).unwrap();
1184        let rule = MD070NestedCodeFence::new();
1185        let ctx = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1186        let remaining = rule.check(&ctx).unwrap();
1187        // At minimum the first block should be fixed; consecutive blocks may
1188        // require multiple passes but the first pass must not make things worse
1189        assert!(
1190            remaining.len() < 2,
1191            "fix() should resolve at least one collision, remaining: {remaining:?}",
1192        );
1193    }
1194}
rumdl_lib/rules/md070_nested_code_fence.rs

rumdl_lib/rules/
md070_nested_code_fence.rs