rumdl_lib/rules/
md070_nested_code_fence.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2
3/// Rule MD070: Nested code fence collision detection
4///
5/// Detects when a fenced code block contains fence markers that would cause
6/// premature closure. Suggests using longer fences to avoid this issue.
7///
8/// Checks languages where triple backtick sequences commonly appear:
9/// markdown, Python, JavaScript, shell, Rust, Go, and others with multiline
10/// strings, heredocs, template literals, or doc comments.
11///
12/// See [docs/md070.md](../../docs/md070.md) for full documentation.
13#[derive(Clone, Default)]
14pub struct MD070NestedCodeFence;
15
16impl MD070NestedCodeFence {
17    pub fn new() -> Self {
18        Self
19    }
20
21    /// Check if the given language should be checked for nested fences.
22    /// Covers languages where triple backtick sequences commonly appear in source:
23    /// multiline strings with embedded markdown, heredocs, doc comments, template
24    /// literals, and data formats with multiline string values.
25    fn should_check_language(lang: &str) -> bool {
26        let base = lang.split_whitespace().next().unwrap_or("");
27        matches!(
28            base.to_ascii_lowercase().as_str(),
29            // Documentation / markup
30            ""
31                | "markdown"
32                | "md"
33                | "mdx"
34                | "text"
35                | "txt"
36                | "plain"
37                // Multiline strings / docstrings
38                | "python"
39                | "py"
40                | "ruby"
41                | "rb"
42                | "perl"
43                | "pl"
44                | "php"
45                | "lua"
46                | "r"
47                | "rmd"
48                | "rmarkdown"
49                // Template literals / raw strings
50                | "javascript"
51                | "js"
52                | "jsx"
53                | "mjs"
54                | "cjs"
55                | "typescript"
56                | "ts"
57                | "tsx"
58                | "mts"
59                | "rust"
60                | "rs"
61                | "go"
62                | "golang"
63                | "swift"
64                | "kotlin"
65                | "kt"
66                | "kts"
67                | "java"
68                | "csharp"
69                | "cs"
70                | "c#"
71                | "scala"
72                // Shell heredocs
73                | "shell"
74                | "sh"
75                | "bash"
76                | "zsh"
77                | "fish"
78                | "powershell"
79                | "ps1"
80                | "pwsh"
81                // Data / config formats
82                | "yaml"
83                | "yml"
84                | "toml"
85                | "json"
86                | "jsonc"
87                | "json5"
88                // Template engines
89                | "jinja"
90                | "jinja2"
91                | "handlebars"
92                | "hbs"
93                | "liquid"
94                | "nunjucks"
95                | "njk"
96                | "ejs"
97                // Terminal output
98                | "console"
99                | "terminal"
100        )
101    }
102
103    /// Find the maximum fence length of same-character fences in the content
104    /// Returns (line_offset, fence_length) of the first collision, if any
105    fn find_fence_collision(content: &str, fence_char: char, outer_fence_length: usize) -> Option<(usize, usize)> {
106        for (line_idx, line) in content.lines().enumerate() {
107            let trimmed = line.trim_start();
108
109            // Check if line starts with the same fence character
110            if trimmed.starts_with(fence_char) {
111                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
112
113                // Collision if same char AND at least as long as outer fence
114                if count >= outer_fence_length {
115                    // Verify it looks like a fence line (only fence chars + optional language/whitespace)
116                    let after_fence = &trimmed[count..];
117                    // A fence line is: fence chars + optional language identifier + optional whitespace
118                    // We detect collision if:
119                    // - Line ends after fence chars (closing fence)
120                    // - Line has alphanumeric after fence (opening fence with language)
121                    // - Line has only whitespace after fence
122                    if after_fence.is_empty()
123                        || after_fence.trim().is_empty()
124                        || after_fence
125                            .chars()
126                            .next()
127                            .is_some_and(|c| c.is_alphabetic() || c == '{')
128                    {
129                        return Some((line_idx, count));
130                    }
131                }
132            }
133        }
134        None
135    }
136
137    /// Find the maximum fence length needed to safely contain the content
138    fn find_safe_fence_length(content: &str, fence_char: char) -> usize {
139        let mut max_fence = 0;
140
141        for line in content.lines() {
142            let trimmed = line.trim_start();
143            if trimmed.starts_with(fence_char) {
144                let count = trimmed.chars().take_while(|&c| c == fence_char).count();
145                if count >= 3 {
146                    // Only count valid fence-like patterns
147                    let after_fence = &trimmed[count..];
148                    if after_fence.is_empty()
149                        || after_fence.trim().is_empty()
150                        || after_fence
151                            .chars()
152                            .next()
153                            .is_some_and(|c| c.is_alphabetic() || c == '{')
154                    {
155                        max_fence = max_fence.max(count);
156                    }
157                }
158            }
159        }
160
161        max_fence
162    }
163
164    /// Find the user's intended closing fence when a collision is detected.
165    /// Searches past the first (premature) closing fence for the last bare
166    /// fence of the same type before hitting a new opening fence.
167    fn find_intended_close(
168        lines: &[&str],
169        first_close: usize,
170        fence_char: char,
171        fence_length: usize,
172        opening_indent: usize,
173    ) -> usize {
174        let mut intended_close = first_close;
175        for (j, line_j) in lines.iter().enumerate().skip(first_close + 1) {
176            if Self::is_closing_fence(line_j, fence_char, fence_length) {
177                intended_close = j;
178            } else if Self::parse_fence_line(line_j)
179                .is_some_and(|(ind, ch, _, info)| ind <= opening_indent && ch == fence_char && !info.is_empty())
180            {
181                break;
182            }
183        }
184        intended_close
185    }
186
187    /// Parse a fence marker from a line, returning (indent, fence_char, fence_length, info_string)
188    fn parse_fence_line(line: &str) -> Option<(usize, char, usize, &str)> {
189        let indent = line.len() - line.trim_start().len();
190        // Per CommonMark, fence must have 0-3 spaces of indentation
191        if indent > 3 {
192            return None;
193        }
194
195        let trimmed = line.trim_start();
196
197        if trimmed.starts_with("```") {
198            let count = trimmed.chars().take_while(|&c| c == '`').count();
199            if count >= 3 {
200                let info = trimmed[count..].trim();
201                return Some((indent, '`', count, info));
202            }
203        } else if trimmed.starts_with("~~~") {
204            let count = trimmed.chars().take_while(|&c| c == '~').count();
205            if count >= 3 {
206                let info = trimmed[count..].trim();
207                return Some((indent, '~', count, info));
208            }
209        }
210
211        None
212    }
213
214    /// Check if a line is a valid closing fence for the given opening fence
215    /// Per CommonMark, closing fences can have 0-3 spaces of indentation regardless of opening fence
216    fn is_closing_fence(line: &str, fence_char: char, min_length: usize) -> bool {
217        let indent = line.len() - line.trim_start().len();
218        // Per CommonMark spec, closing fence can have 0-3 spaces of indentation
219        if indent > 3 {
220            return false;
221        }
222
223        let trimmed = line.trim_start();
224        if !trimmed.starts_with(fence_char) {
225            return false;
226        }
227
228        let count = trimmed.chars().take_while(|&c| c == fence_char).count();
229        if count < min_length {
230            return false;
231        }
232
233        // Closing fence must have only whitespace after fence chars
234        trimmed[count..].trim().is_empty()
235    }
236}
237
238impl Rule for MD070NestedCodeFence {
239    fn name(&self) -> &'static str {
240        "MD070"
241    }
242
243    fn description(&self) -> &'static str {
244        "Nested code fence collision - use longer fence to avoid premature closure"
245    }
246
247    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
248        let mut warnings = Vec::new();
249        let lines = ctx.raw_lines();
250
251        let mut i = 0;
252        while i < lines.len() {
253            // Skip lines in contexts that shouldn't be processed
254            if let Some(line_info) = ctx.lines.get(i)
255                && (line_info.in_front_matter
256                    || line_info.in_html_comment
257                    || line_info.in_mdx_comment
258                    || line_info.in_html_block)
259            {
260                i += 1;
261                continue;
262            }
263
264            // Skip if we're already inside a code block (check previous line).
265            // This handles list-indented code blocks (4+ spaces) which our rule doesn't
266            // parse directly, but the context detects correctly. If the previous line
267            // is in a code block, this line is either content or a closing fence for
268            // that block - not a new opening fence.
269            if i > 0
270                && let Some(prev_line_info) = ctx.lines.get(i - 1)
271                && prev_line_info.in_code_block
272            {
273                i += 1;
274                continue;
275            }
276
277            let line = lines[i];
278
279            // Try to parse as opening fence
280            if let Some((_indent, fence_char, fence_length, info_string)) = Self::parse_fence_line(line) {
281                let block_start = i;
282
283                // Extract the language (first word of info string)
284                let language = info_string.split_whitespace().next().unwrap_or("");
285
286                // Find the closing fence
287                let mut block_end = None;
288                for (j, line_j) in lines.iter().enumerate().skip(i + 1) {
289                    if Self::is_closing_fence(line_j, fence_char, fence_length) {
290                        block_end = Some(j);
291                        break;
292                    }
293                }
294
295                if let Some(end_line) = block_end {
296                    // We have a complete code block from block_start to end_line
297                    // Check if we should analyze this block
298                    if Self::should_check_language(language) {
299                        // Get the content between fences
300                        let block_content: String = if block_start + 1 < end_line {
301                            lines[(block_start + 1)..end_line].join("\n")
302                        } else {
303                            String::new()
304                        };
305
306                        // Check for fence collision
307                        if let Some((collision_line_offset, _collision_length)) =
308                            Self::find_fence_collision(&block_content, fence_char, fence_length)
309                        {
310                            let collision_line_num = block_start + 1 + collision_line_offset + 1; // 1-indexed
311
312                            // Find the user's intended closing fence (may be past the
313                            // CommonMark-visible close when inner ``` causes premature closure)
314                            let indent = line.len() - line.trim_start().len();
315                            let intended_close =
316                                Self::find_intended_close(lines, end_line, fence_char, fence_length, indent);
317
318                            // Compute safe fence length from the full intended content
319                            let full_content: String = if block_start + 1 < intended_close {
320                                lines[(block_start + 1)..intended_close].join("\n")
321                            } else {
322                                block_content.clone()
323                            };
324                            let safe_length = Self::find_safe_fence_length(&full_content, fence_char) + 1;
325                            let suggested_fence: String = std::iter::repeat_n(fence_char, safe_length).collect();
326
327                            // Build a Fix that replaces the block from opening fence
328                            // through the intended closing fence. This must be safe for
329                            // direct application by the LSP code action path.
330                            let open_byte_start = ctx.line_index.get_line_start_byte(block_start + 1).unwrap_or(0);
331                            let close_byte_end = ctx
332                                .line_index
333                                .get_line_start_byte(intended_close + 2)
334                                .unwrap_or(ctx.content.len());
335
336                            let indent_str = &line[..indent];
337                            let closing_line = lines[intended_close];
338                            let closing_indent = &closing_line[..closing_line.len() - closing_line.trim_start().len()];
339                            let mut replacement = format!("{indent_str}{suggested_fence}");
340                            if !info_string.is_empty() {
341                                replacement.push_str(info_string);
342                            }
343                            replacement.push('\n');
344                            for content_line in &lines[(block_start + 1)..intended_close] {
345                                replacement.push_str(content_line);
346                                replacement.push('\n');
347                            }
348                            replacement.push_str(closing_indent);
349                            replacement.push_str(&suggested_fence);
350                            // Only add trailing newline if the replaced range ends with one
351                            if close_byte_end <= ctx.content.len() && ctx.content[..close_byte_end].ends_with('\n') {
352                                replacement.push('\n');
353                            }
354
355                            warnings.push(LintWarning {
356                                rule_name: Some(self.name().to_string()),
357                                message: format!(
358                                    "Code block contains fence markers at line {collision_line_num} that interfere with block parsing — use {suggested_fence} for outer fence"
359                                ),
360                                line: block_start + 1,
361                                column: 1,
362                                end_line: intended_close + 1,
363                                end_column: lines[intended_close].len() + 1,
364                                severity: Severity::Warning,
365                                fix: Some(Fix {
366                                    range: (open_byte_start..close_byte_end),
367                                    replacement,
368                                }),
369                            });
370                        }
371                    }
372
373                    // Move past this code block
374                    i = end_line + 1;
375                    continue;
376                }
377            }
378
379            i += 1;
380        }
381
382        Ok(warnings)
383    }
384
385    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
386        if self.should_skip(ctx) {
387            return Ok(ctx.content.to_string());
388        }
389        let warnings = self.check(ctx)?;
390        if warnings.is_empty() {
391            return Ok(ctx.content.to_string());
392        }
393        let warnings =
394            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
395        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::FixFailed)
396    }
397
398    fn category(&self) -> RuleCategory {
399        RuleCategory::CodeBlock
400    }
401
402    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
403        ctx.content.is_empty() || (!ctx.likely_has_code() && !ctx.has_char('~'))
404    }
405
406    fn as_any(&self) -> &dyn std::any::Any {
407        self
408    }
409
410    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
411    where
412        Self: Sized,
413    {
414        Box::new(MD070NestedCodeFence::new())
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421    use crate::lint_context::LintContext;
422
423    fn run_check(content: &str) -> LintResult {
424        let rule = MD070NestedCodeFence::new();
425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
426        rule.check(&ctx)
427    }
428
429    fn run_fix(content: &str) -> Result<String, LintError> {
430        let rule = MD070NestedCodeFence::new();
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
432        rule.fix(&ctx)
433    }
434
435    #[test]
436    fn test_no_collision_simple() {
437        let content = "```python\nprint('hello')\n```\n";
438        let result = run_check(content).unwrap();
439        assert!(result.is_empty(), "Simple code block should not trigger warning");
440    }
441
442    #[test]
443    fn test_no_collision_unchecked_language() {
444        // C is not checked for nested fences (triple backticks don't appear in C source)
445        let content = "```c\n```bash\necho hello\n```\n```\n";
446        let result = run_check(content).unwrap();
447        assert!(result.is_empty(), "Unchecked language should not trigger");
448    }
449
450    #[test]
451    fn test_collision_python_language() {
452        // Python is checked — triple-quoted strings commonly contain markdown
453        let content = "```python\n```json\n{}\n```\n```\n";
454        let result = run_check(content).unwrap();
455        assert_eq!(result.len(), 1, "Python should be checked for nested fences");
456        assert!(result[0].message.contains("````"));
457    }
458
459    #[test]
460    fn test_collision_javascript_language() {
461        let content = "```javascript\n```html\n<div></div>\n```\n```\n";
462        let result = run_check(content).unwrap();
463        assert_eq!(result.len(), 1, "JavaScript should be checked for nested fences");
464    }
465
466    #[test]
467    fn test_collision_shell_language() {
468        let content = "```bash\n```yaml\nkey: val\n```\n```\n";
469        let result = run_check(content).unwrap();
470        assert_eq!(result.len(), 1, "Shell should be checked for nested fences");
471    }
472
473    #[test]
474    fn test_collision_rust_language() {
475        let content = "```rust\n```toml\n[dep]\n```\n```\n";
476        let result = run_check(content).unwrap();
477        assert_eq!(result.len(), 1, "Rust should be checked for nested fences");
478    }
479
480    #[test]
481    fn test_no_collision_assembly_language() {
482        // Assembly, C, SQL etc. should NOT be checked
483        for lang in ["asm", "c", "cpp", "sql", "css", "fortran"] {
484            let content = format!("```{lang}\n```inner\ncontent\n```\n```\n");
485            let result = run_check(&content).unwrap();
486            assert!(result.is_empty(), "{lang} should not be checked for nested fences");
487        }
488    }
489
490    #[test]
491    fn test_collision_markdown_language() {
492        let content = "```markdown\n```python\ncode()\n```\n```\n";
493        let result = run_check(content).unwrap();
494        assert_eq!(result.len(), 1, "Should emit single warning for collision");
495        assert!(result[0].message.contains("fence markers at line"));
496        assert!(result[0].message.contains("interfere with block parsing"));
497        assert!(result[0].message.contains("use ````"));
498    }
499
500    #[test]
501    fn test_collision_empty_language() {
502        // Empty language (no language specified) is checked
503        let content = "```\n```python\ncode()\n```\n```\n";
504        let result = run_check(content).unwrap();
505        assert_eq!(result.len(), 1, "Empty language should be checked");
506    }
507
508    #[test]
509    fn test_no_collision_longer_outer_fence() {
510        let content = "````markdown\n```python\ncode()\n```\n````\n";
511        let result = run_check(content).unwrap();
512        assert!(result.is_empty(), "Longer outer fence should not trigger warning");
513    }
514
515    #[test]
516    fn test_tilde_fence_ignores_backticks() {
517        // Tildes and backticks don't conflict
518        let content = "~~~markdown\n```python\ncode()\n```\n~~~\n";
519        let result = run_check(content).unwrap();
520        assert!(result.is_empty(), "Different fence types should not collide");
521    }
522
523    #[test]
524    fn test_tilde_collision() {
525        let content = "~~~markdown\n~~~python\ncode()\n~~~\n~~~\n";
526        let result = run_check(content).unwrap();
527        assert_eq!(result.len(), 1, "Same fence type should collide");
528        assert!(result[0].message.contains("~~~~"));
529    }
530
531    #[test]
532    fn test_fix_increases_fence_length() {
533        let content = "```markdown\n```python\ncode()\n```\n```\n";
534        let fixed = run_fix(content).unwrap();
535        assert!(fixed.starts_with("````markdown"), "Should increase to 4 backticks");
536        assert!(
537            fixed.contains("````\n") || fixed.ends_with("````"),
538            "Closing should also be 4 backticks"
539        );
540    }
541
542    #[test]
543    fn test_fix_handles_longer_inner_fence() {
544        // Inner fence has 5 backticks, so outer needs 6
545        let content = "```markdown\n`````python\ncode()\n`````\n```\n";
546        let fixed = run_fix(content).unwrap();
547        assert!(fixed.starts_with("``````markdown"), "Should increase to 6 backticks");
548    }
549
550    #[test]
551    fn test_backticks_in_code_not_fence() {
552        // Template literals in JS shouldn't trigger
553        let content = "```markdown\nconst x = `template`;\n```\n";
554        let result = run_check(content).unwrap();
555        assert!(result.is_empty(), "Inline backticks should not be detected as fences");
556    }
557
558    #[test]
559    fn test_preserves_info_string() {
560        let content = "```markdown {.highlight}\n```python\ncode()\n```\n```\n";
561        let fixed = run_fix(content).unwrap();
562        assert!(
563            fixed.contains("````markdown {.highlight}"),
564            "Should preserve info string attributes"
565        );
566    }
567
568    #[test]
569    fn test_md_language_alias() {
570        let content = "```md\n```python\ncode()\n```\n```\n";
571        let result = run_check(content).unwrap();
572        assert_eq!(result.len(), 1, "md should be recognized as markdown");
573    }
574
575    #[test]
576    fn test_real_world_docs_case() {
577        // This is the actual pattern from docs/md031.md that triggered the PR
578        let content = r#"```markdown
5791. First item
580
581   ```python
582   code_in_list()
583   ```
584
5851. Second item
586
587```
588"#;
589        let result = run_check(content).unwrap();
590        assert_eq!(result.len(), 1, "Should emit single warning for nested fence issue");
591        assert!(result[0].message.contains("line 4")); // The nested ``` is on line 4
592
593        let fixed = run_fix(content).unwrap();
594        assert!(fixed.starts_with("````markdown"), "Should fix with longer fence");
595    }
596
597    #[test]
598    fn test_empty_code_block() {
599        let content = "```markdown\n```\n";
600        let result = run_check(content).unwrap();
601        assert!(result.is_empty(), "Empty code block should not trigger");
602    }
603
604    #[test]
605    fn test_multiple_code_blocks() {
606        // The markdown block has a collision (inner ```python closes it prematurely).
607        // The orphan closing fence (line 9) is NOT treated as a new opening fence
608        // because the context correctly detects it as part of the markdown block.
609        let content = r#"```python
610safe code
611```
612
613```markdown
614```python
615collision
616```
617```
618
619```javascript
620also safe
621```
622"#;
623        let result = run_check(content).unwrap();
624        // Only 1 warning for the markdown block collision.
625        // The orphan fence is correctly ignored (not parsed as new opening fence).
626        assert_eq!(result.len(), 1, "Should emit single warning for collision");
627        assert!(result[0].message.contains("line 6")); // The nested ```python is on line 6
628    }
629
630    #[test]
631    fn test_single_collision_properly_closed() {
632        // When the outer fence is properly longer, only the intended block triggers
633        let content = r#"```python
634safe code
635```
636
637````markdown
638```python
639collision
640```
641````
642
643```javascript
644also safe
645```
646"#;
647        let result = run_check(content).unwrap();
648        assert!(result.is_empty(), "Properly fenced blocks should not trigger");
649    }
650
651    #[test]
652    fn test_indented_code_block_in_list() {
653        let content = r#"- List item
654  ```markdown
655  ```python
656  nested
657  ```
658  ```
659"#;
660        let result = run_check(content).unwrap();
661        assert_eq!(result.len(), 1, "Should detect collision in indented block");
662        assert!(result[0].message.contains("````"));
663    }
664
665    #[test]
666    fn test_no_false_positive_list_indented_block() {
667        // 4-space indented code blocks in list context (GFM extension) should not
668        // cause false positives. The closing fence with 3-space indent should not
669        // be parsed as a new opening fence.
670        let content = r#"1. List item with code:
671
672    ```json
673    {"key": "value"}
674    ```
675
6762. Another item
677
678   ```python
679   code()
680   ```
681"#;
682        let result = run_check(content).unwrap();
683        // No collision - these are separate, well-formed code blocks
684        assert!(
685            result.is_empty(),
686            "List-indented code blocks should not trigger false positives"
687        );
688    }
689
690    // ==================== Comprehensive Edge Case Tests ====================
691
692    #[test]
693    fn test_case_insensitive_language() {
694        // MARKDOWN, Markdown, MD should all be checked
695        for lang in ["MARKDOWN", "Markdown", "MD", "Md", "mD"] {
696            let content = format!("```{lang}\n```python\ncode()\n```\n```\n");
697            let result = run_check(&content).unwrap();
698            assert_eq!(result.len(), 1, "{lang} should be recognized as markdown");
699        }
700    }
701
702    #[test]
703    fn test_unclosed_outer_fence() {
704        // If outer fence is never closed, no collision can be detected
705        let content = "```markdown\n```python\ncode()\n```\n";
706        let result = run_check(content).unwrap();
707        // The outer fence finds ```python as its closing fence (premature close)
708        // Then ```\n at the end becomes orphan - but context would handle this
709        assert!(result.len() <= 1, "Unclosed fence should not cause issues");
710    }
711
712    #[test]
713    fn test_deeply_nested_fences() {
714        // Multiple levels of nesting require progressively longer fences
715        let content = r#"```markdown
716````markdown
717```python
718code()
719```
720````
721```
722"#;
723        let result = run_check(content).unwrap();
724        // The outer ``` sees ```` as collision (4 >= 3)
725        assert_eq!(result.len(), 1, "Deep nesting should trigger warning");
726        assert!(result[0].message.contains("`````")); // Needs 5 to be safe
727    }
728
729    #[test]
730    fn test_very_long_fences() {
731        // 10 backtick fences should work correctly
732        let content = "``````````markdown\n```python\ncode()\n```\n``````````\n";
733        let result = run_check(content).unwrap();
734        assert!(result.is_empty(), "Very long outer fence should not trigger warning");
735    }
736
737    #[test]
738    fn test_blockquote_with_fence() {
739        // Fences inside blockquotes (CommonMark allows this)
740        let content = "> ```markdown\n> ```python\n> code()\n> ```\n> ```\n";
741        let result = run_check(content).unwrap();
742        // Blockquote prefixes are part of the line, so parsing may differ
743        // This documents current behavior
744        assert!(result.is_empty() || result.len() == 1);
745    }
746
747    #[test]
748    fn test_fence_with_attributes() {
749        // Info string with attributes like {.class #id}
750        let content = "```markdown {.highlight #example}\n```python\ncode()\n```\n```\n";
751        let result = run_check(content).unwrap();
752        assert_eq!(
753            result.len(),
754            1,
755            "Attributes in info string should not prevent detection"
756        );
757
758        let fixed = run_fix(content).unwrap();
759        assert!(
760            fixed.contains("````markdown {.highlight #example}"),
761            "Attributes should be preserved in fix"
762        );
763    }
764
765    #[test]
766    fn test_trailing_whitespace_in_info_string() {
767        let content = "```markdown   \n```python\ncode()\n```\n```\n";
768        let result = run_check(content).unwrap();
769        assert_eq!(result.len(), 1, "Trailing whitespace should not affect detection");
770    }
771
772    #[test]
773    fn test_only_closing_fence_pattern() {
774        // Content that has only closing fence patterns (no language)
775        let content = "```markdown\nsome text\n```\nmore text\n```\n";
776        let result = run_check(content).unwrap();
777        // The first ``` closes, second ``` is outside
778        assert!(result.is_empty(), "Properly closed block should not trigger");
779    }
780
781    #[test]
782    fn test_fence_at_end_of_file_no_newline() {
783        let content = "```markdown\n```python\ncode()\n```\n```";
784        let result = run_check(content).unwrap();
785        assert_eq!(result.len(), 1, "Should detect collision even without trailing newline");
786
787        let fixed = run_fix(content).unwrap();
788        assert!(!fixed.ends_with('\n'), "Should preserve lack of trailing newline");
789    }
790
791    #[test]
792    fn test_empty_lines_between_fences() {
793        let content = "```markdown\n\n\n```python\n\ncode()\n\n```\n\n```\n";
794        let result = run_check(content).unwrap();
795        assert_eq!(result.len(), 1, "Empty lines should not affect collision detection");
796    }
797
798    #[test]
799    fn test_tab_indented_opening_fence() {
800        // Tab at start of line - CommonMark says tab = 4 spaces for indentation.
801        // A 4-space indented fence is NOT a valid fenced code block per CommonMark
802        // (only 0-3 spaces allowed). However, our implementation counts characters,
803        // treating tab as 1 character. This means tab-indented fences ARE parsed.
804        // This is intentional: consistent with other rules in rumdl and matches
805        // common editor behavior where tab = 1 indent level.
806        let content = "\t```markdown\n```python\ncode()\n```\n```\n";
807        let result = run_check(content).unwrap();
808        // With tab treated as 1 char (< 3), this IS parsed as a fence and triggers collision
809        assert_eq!(result.len(), 1, "Tab-indented fence is parsed (tab = 1 char)");
810    }
811
812    #[test]
813    fn test_mixed_fence_types_no_collision() {
814        // Backticks outer, tildes inner - should never collide
815        let content = "```markdown\n~~~python\ncode()\n~~~\n```\n";
816        let result = run_check(content).unwrap();
817        assert!(result.is_empty(), "Different fence chars should not collide");
818
819        // Tildes outer, backticks inner
820        let content2 = "~~~markdown\n```python\ncode()\n```\n~~~\n";
821        let result2 = run_check(content2).unwrap();
822        assert!(result2.is_empty(), "Different fence chars should not collide");
823    }
824
825    #[test]
826    fn test_frontmatter_not_confused_with_fence() {
827        // YAML frontmatter uses --- which shouldn't be confused with fences
828        let content = "---\ntitle: Test\n---\n\n```markdown\n```python\ncode()\n```\n```\n";
829        let result = run_check(content).unwrap();
830        assert_eq!(result.len(), 1, "Should detect collision after frontmatter");
831    }
832
833    #[test]
834    fn test_html_comment_with_fence_inside() {
835        // Fences inside HTML comments should be ignored
836        let content = "<!-- ```markdown\n```python\ncode()\n``` -->\n\n```markdown\nreal content\n```\n";
837        let result = run_check(content).unwrap();
838        // The fences inside HTML comment should be skipped
839        assert!(result.is_empty(), "Fences in HTML comments should be ignored");
840    }
841
842    #[test]
843    fn test_consecutive_code_blocks() {
844        // Multiple consecutive markdown blocks, each with collision
845        let content = r#"```markdown
846```python
847a()
848```
849```
850
851```markdown
852```ruby
853b()
854```
855```
856"#;
857        let result = run_check(content).unwrap();
858        // Each markdown block has its own collision
859        assert!(!result.is_empty(), "Should detect collision in first block");
860    }
861
862    #[test]
863    fn test_numeric_info_string() {
864        // Numbers after fence - some parsers treat this differently
865        let content = "```123\n```456\ncode()\n```\n```\n";
866        let result = run_check(content).unwrap();
867        // "123" is not "markdown" or "md", so should not check
868        assert!(result.is_empty(), "Numeric info string is not markdown");
869    }
870
871    #[test]
872    fn test_collision_at_exact_length() {
873        // An empty ``` is the closing fence, not a collision.
874        // For a collision, the inner fence must have content that looks like an opening fence.
875        let content = "```markdown\n```python\ncode()\n```\n```\n";
876        let result = run_check(content).unwrap();
877        assert_eq!(
878            result.len(),
879            1,
880            "Same-length fence with language should trigger collision"
881        );
882
883        // Inner fence one shorter than outer - not a collision
884        let content2 = "````markdown\n```python\ncode()\n```\n````\n";
885        let result2 = run_check(content2).unwrap();
886        assert!(result2.is_empty(), "Shorter inner fence should not collide");
887
888        // Empty markdown block followed by another fence - not a collision
889        let content3 = "```markdown\n```\n";
890        let result3 = run_check(content3).unwrap();
891        assert!(result3.is_empty(), "Empty closing fence is not a collision");
892    }
893
894    #[test]
895    fn test_fix_preserves_content_exactly() {
896        // Fix should not modify the content between fences
897        let content = "```markdown\n```python\n  indented\n\ttabbed\nspecial: !@#$%\n```\n```\n";
898        let fixed = run_fix(content).unwrap();
899        assert!(fixed.contains("  indented"), "Indentation should be preserved");
900        assert!(fixed.contains("\ttabbed"), "Tabs should be preserved");
901        assert!(fixed.contains("special: !@#$%"), "Special chars should be preserved");
902    }
903
904    #[test]
905    fn test_warning_line_numbers_accurate() {
906        let content = "# Title\n\nParagraph\n\n```markdown\n```python\ncode()\n```\n```\n";
907        let result = run_check(content).unwrap();
908        assert_eq!(result.len(), 1);
909        assert_eq!(result[0].line, 5, "Warning should be on opening fence line");
910        assert!(result[0].message.contains("line 6"), "Collision line should be line 6");
911    }
912
913    #[test]
914    fn test_should_skip_optimization() {
915        let rule = MD070NestedCodeFence::new();
916
917        // No code-like content
918        let ctx1 = LintContext::new("Just plain text", crate::config::MarkdownFlavor::Standard, None);
919        assert!(
920            rule.should_skip(&ctx1),
921            "Should skip content without backticks or tildes"
922        );
923
924        // Has backticks
925        let ctx2 = LintContext::new("Has `code`", crate::config::MarkdownFlavor::Standard, None);
926        assert!(!rule.should_skip(&ctx2), "Should not skip content with backticks");
927
928        // Has tildes
929        let ctx3 = LintContext::new("Has ~~~", crate::config::MarkdownFlavor::Standard, None);
930        assert!(!rule.should_skip(&ctx3), "Should not skip content with tildes");
931
932        // Empty
933        let ctx4 = LintContext::new("", crate::config::MarkdownFlavor::Standard, None);
934        assert!(rule.should_skip(&ctx4), "Should skip empty content");
935    }
936
937    #[test]
938    fn test_python_triplestring_fence_collision_fix() {
939        // Reproduces GitHub issue #518: Python triple-quoted strings with embedded
940        // markdown cause premature fence closure
941        let content = "# Test\n\n```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n```\n";
942        let result = run_check(content).unwrap();
943        assert_eq!(result.len(), 1, "Should detect collision in python block");
944        assert!(result[0].fix.is_some(), "Warning should be marked as fixable");
945
946        let fixed = run_fix(content).unwrap();
947        assert!(
948            fixed.contains("````python"),
949            "Should upgrade opening fence to 4 backticks"
950        );
951        assert!(
952            fixed.contains("````\n") || fixed.ends_with("````"),
953            "Should upgrade closing fence to 4 backticks"
954        );
955        // Content between fences should be preserved
956        assert!(fixed.contains("```json"), "Inner fences should be preserved as content");
957    }
958
959    #[test]
960    fn test_warning_is_fixable() {
961        // All MD070 warnings must have fix.is_some() so the fix coordinator calls fix()
962        let content = "```markdown\n```python\ncode()\n```\n```\n";
963        let result = run_check(content).unwrap();
964        assert_eq!(result.len(), 1);
965        assert!(
966            result[0].fix.is_some(),
967            "MD070 warnings must be marked fixable for the fix coordinator"
968        );
969    }
970
971    #[test]
972    fn test_fix_via_warning_struct_is_safe() {
973        // The Fix on warnings is used directly by the LSP code action path.
974        // It must produce valid output (not delete the fence or corrupt the file).
975        let content = "```markdown\n```python\ncode()\n```\n```\n";
976        let result = run_check(content).unwrap();
977        assert_eq!(result.len(), 1);
978
979        let fix = result[0].fix.as_ref().unwrap();
980        // Apply the Fix directly (simulating LSP path)
981        let mut fixed = String::new();
982        fixed.push_str(&content[..fix.range.start]);
983        fixed.push_str(&fix.replacement);
984        fixed.push_str(&content[fix.range.end..]);
985
986        // The fixed content should have upgraded fences
987        assert!(
988            fixed.contains("````markdown"),
989            "Direct Fix application should upgrade opening fence, got: {fixed}"
990        );
991        assert!(
992            fixed.contains("````\n") || fixed.ends_with("````"),
993            "Direct Fix application should upgrade closing fence, got: {fixed}"
994        );
995        // Content should be preserved
996        assert!(
997            fixed.contains("```python"),
998            "Inner content should be preserved, got: {fixed}"
999        );
1000    }
1001
1002    #[test]
1003    fn test_fix_via_warning_struct_python_block() {
1004        // Test the LSP code action path for a Python block where CommonMark's
1005        // closing fence differs from the user's intended closing fence.
1006        // CommonMark sees: ```python (line 1) closed by bare ``` (line 6).
1007        // User intended: ```python (line 1) closed by ``` (line 10).
1008        let content = "```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n    print(text)\nf()\n```\n";
1009        let result = run_check(content).unwrap();
1010        assert_eq!(result.len(), 1);
1011
1012        let fix = result[0].fix.as_ref().unwrap();
1013        let mut fixed = String::new();
1014        fixed.push_str(&content[..fix.range.start]);
1015        fixed.push_str(&fix.replacement);
1016        fixed.push_str(&content[fix.range.end..]);
1017
1018        // The Fix must cover the full intended block (lines 1-10), not just
1019        // the CommonMark-visible block (lines 1-6). Verify the fixed content
1020        // has one code block containing ALL the Python code.
1021        assert!(
1022            fixed.starts_with("````python\n"),
1023            "Should upgrade opening fence, got:\n{fixed}"
1024        );
1025        assert!(
1026            fixed.contains("````\n") || fixed.trim_end().ends_with("````"),
1027            "Should upgrade closing fence, got:\n{fixed}"
1028        );
1029        // ALL Python code must be between the fences
1030        let fence_start = fixed.find("````python\n").unwrap();
1031        let after_open = fence_start + "````python\n".len();
1032        let close_pos = fixed[after_open..]
1033            .find("\n````\n")
1034            .or_else(|| fixed[after_open..].find("\n````"));
1035        assert!(
1036            close_pos.is_some(),
1037            "Should have closing fence after content, got:\n{fixed}"
1038        );
1039        let block_content = &fixed[after_open..after_open + close_pos.unwrap()];
1040        assert!(
1041            block_content.contains("print(text)"),
1042            "print(text) must be inside the code block, got block:\n{block_content}"
1043        );
1044        assert!(
1045            block_content.contains("f()"),
1046            "f() must be inside the code block, got block:\n{block_content}"
1047        );
1048        assert!(
1049            block_content.contains("```json"),
1050            "Inner fences must be preserved as content, got block:\n{block_content}"
1051        );
1052    }
1053
1054    #[test]
1055    fn test_fix_via_apply_warning_fixes() {
1056        // End-to-end test of the LSP fix path using apply_warning_fixes
1057        let content = "```markdown\n```python\ncode()\n```\n```\n";
1058        let result = run_check(content).unwrap();
1059        assert_eq!(result.len(), 1);
1060
1061        let fixed = crate::utils::fix_utils::apply_warning_fixes(content, &result).unwrap();
1062        assert!(
1063            fixed.contains("````markdown"),
1064            "apply_warning_fixes should upgrade opening fence"
1065        );
1066        assert!(
1067            fixed.contains("````\n") || fixed.ends_with("````"),
1068            "apply_warning_fixes should upgrade closing fence"
1069        );
1070
1071        // Re-check should find no issues
1072        let ctx2 = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1073        let rule = MD070NestedCodeFence::new();
1074        let result2 = rule.check(&ctx2).unwrap();
1075        assert!(
1076            result2.is_empty(),
1077            "Re-check after LSP fix should find no issues, got: {:?}",
1078            result2.iter().map(|w| &w.message).collect::<Vec<_>>()
1079        );
1080    }
1081
1082    /// Helper: run fix() then check() on the result, asserting 0 violations remain
1083    fn assert_fix_roundtrip(content: &str, label: &str) {
1084        let fixed = run_fix(content).unwrap();
1085        let rule = MD070NestedCodeFence::new();
1086        let ctx = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1087        let remaining = rule.check(&ctx).unwrap();
1088        assert!(
1089            remaining.is_empty(),
1090            "[{label}] fix() should resolve all violations, but {n} remain: {msgs:?}\nFixed content:\n{fixed}",
1091            n = remaining.len(),
1092            msgs = remaining.iter().map(|w| &w.message).collect::<Vec<_>>(),
1093        );
1094    }
1095
1096    #[test]
1097    fn test_fix_roundtrip_basic() {
1098        assert_fix_roundtrip("```markdown\n```python\ncode()\n```\n```\n", "basic collision");
1099    }
1100
1101    #[test]
1102    fn test_fix_roundtrip_longer_inner_fence() {
1103        assert_fix_roundtrip("```markdown\n`````python\ncode()\n`````\n```\n", "longer inner fence");
1104    }
1105
1106    #[test]
1107    fn test_fix_roundtrip_tilde_collision() {
1108        assert_fix_roundtrip("~~~markdown\n~~~python\ncode()\n~~~\n~~~\n", "tilde collision");
1109    }
1110
1111    #[test]
1112    fn test_fix_roundtrip_info_string_attrs() {
1113        assert_fix_roundtrip(
1114            "```markdown {.highlight}\n```python\ncode()\n```\n```\n",
1115            "info string with attrs",
1116        );
1117    }
1118
1119    #[test]
1120    fn test_fix_roundtrip_no_trailing_newline() {
1121        assert_fix_roundtrip("```markdown\n```python\ncode()\n```\n```", "no trailing newline");
1122    }
1123
1124    #[test]
1125    fn test_fix_roundtrip_python_triple_string() {
1126        assert_fix_roundtrip(
1127            "# Test\n\n```python\ndef f():\n    text = \"\"\"\n```json\n{}\n```\n\"\"\"\n```\n",
1128            "python triple string",
1129        );
1130    }
1131
1132    #[test]
1133    fn test_fix_roundtrip_deeply_nested() {
1134        assert_fix_roundtrip(
1135            "```markdown\n````markdown\n```python\ncode()\n```\n````\n```\n",
1136            "deeply nested fences",
1137        );
1138    }
1139
1140    #[test]
1141    fn test_fix_roundtrip_real_world_docs() {
1142        let content = r#"```markdown
11431. First item
1144
1145   ```python
1146   code_in_list()
1147   ```
1148
11491. Second item
1150
1151```
1152"#;
1153        assert_fix_roundtrip(content, "real world docs case");
1154    }
1155
1156    #[test]
1157    fn test_fix_roundtrip_empty_lines() {
1158        assert_fix_roundtrip(
1159            "```markdown\n\n\n```python\n\ncode()\n\n```\n\n```\n",
1160            "empty lines between fences",
1161        );
1162    }
1163
1164    #[test]
1165    fn test_fix_no_change_when_no_violations() {
1166        let content = "````markdown\n```python\ncode()\n```\n````\n";
1167        let fixed = run_fix(content).unwrap();
1168        assert_eq!(fixed, content, "fix() should not modify content with no violations");
1169    }
1170
1171    #[test]
1172    fn test_fix_roundtrip_consecutive_collisions() {
1173        let content = r#"```markdown
1174```python
1175a()
1176```
1177```
1178
1179```md
1180```ruby
1181b()
1182```
1183```
1184"#;
1185        // Fix and verify each collision is resolved
1186        let fixed = run_fix(content).unwrap();
1187        let rule = MD070NestedCodeFence::new();
1188        let ctx = LintContext::new(&fixed, crate::config::MarkdownFlavor::Standard, None);
1189        let remaining = rule.check(&ctx).unwrap();
1190        // At minimum the first block should be fixed; consecutive blocks may
1191        // require multiple passes but the first pass must not make things worse
1192        assert!(
1193            remaining.len() < 2,
1194            "fix() should resolve at least one collision, remaining: {remaining:?}",
1195        );
1196    }
1197}
rumdl_lib/rules/md070_nested_code_fence.rs

rumdl_lib/rules/
md070_nested_code_fence.rs