mdbook_lint_core/rules/standard/
md040.rs

1//! MD040: Fenced code blocks should have a language specified
2//!
3//! This rule checks that fenced code blocks have a language specified for syntax highlighting.
4
5use crate::error::Result;
6use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8    Document,
9    violation::{Severity, Violation},
10};
11use comrak::nodes::{AstNode, NodeValue};
12
13/// Rule to check that fenced code blocks have a language specified
14pub struct MD040;
15
16impl AstRule for MD040 {
17    fn id(&self) -> &'static str {
18        "MD040"
19    }
20
21    fn name(&self) -> &'static str {
22        "fenced-code-language"
23    }
24
25    fn description(&self) -> &'static str {
26        "Fenced code blocks should have a language specified"
27    }
28
29    fn metadata(&self) -> RuleMetadata {
30        RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
31    }
32
33    fn check_ast<'a>(&self, document: &Document, ast: &'a AstNode<'a>) -> Result<Vec<Violation>> {
34        let mut violations = Vec::new();
35
36        // Find all code block nodes
37        for node in ast.descendants() {
38            if let NodeValue::CodeBlock(code_block) = &node.data.borrow().value {
39                // Only check fenced code blocks (ignore indented code blocks)
40                if code_block.fenced {
41                    let info = code_block.info.trim();
42
43                    // Check if language is missing or empty
44                    if info.is_empty()
45                        && let Some((line, column)) = document.node_position(node)
46                    {
47                        violations.push(self.create_violation(
48                            "Fenced code block is missing language specification".to_string(),
49                            line,
50                            column,
51                            Severity::Warning,
52                        ));
53                    }
54                }
55            }
56        }
57
58        Ok(violations)
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use crate::Document;
66    use crate::rule::Rule;
67    use std::path::PathBuf;
68
69    #[test]
70    fn test_md040_no_violations() {
71        let content = r#"# Valid Code Blocks
72
73These code blocks have language tags and should not trigger violations:
74
75```rust
76fn main() {
77    println!("Hello, world!");
78}
79```
80
81```python
82def hello():
83    print("Hello, world!")
84```
85
86```markdown
87# This is markdown
88```
89
90```json
91{
92    "key": "value"
93}
94```
95
96Some text here.
97"#;
98        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
99        let rule = MD040;
100        let violations = rule.check(&document).unwrap();
101
102        assert_eq!(violations.len(), 0);
103    }
104
105    #[test]
106    fn test_md040_missing_language_violation() {
107        let content = r#"# Document with Missing Language
108
109This code block is missing a language specification:
110
111```
112function hello() {
113    console.log("Hello, world!");
114}
115```
116
117Some content here.
118"#;
119        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
120        let rule = MD040;
121        let violations = rule.check(&document).unwrap();
122
123        assert_eq!(violations.len(), 1);
124        assert!(
125            violations[0]
126                .message
127                .contains("Fenced code block is missing language specification")
128        );
129        assert_eq!(violations[0].line, 5);
130    }
131
132    #[test]
133    fn test_md040_multiple_missing_languages() {
134        let content = r#"# Multiple Missing Languages
135
136First code block without language:
137
138```
139console.log("First block");
140```
141
142Some text in between.
143
144```rust
145fn main() {
146    println!("This one has language");
147}
148```
149
150Second code block without language:
151
152```
153print("Second block")
154```
155
156More text.
157
158```
159# Third block without language
160echo "hello"
161```
162"#;
163        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
164        let rule = MD040;
165        let violations = rule.check(&document).unwrap();
166
167        assert_eq!(violations.len(), 3);
168        assert_eq!(violations[0].line, 5);
169        assert_eq!(violations[1].line, 19);
170        assert_eq!(violations[2].line, 25);
171    }
172
173    #[test]
174    fn test_md040_indented_code_blocks_ignored() {
175        let content = r#"# Indented Code Blocks
176
177This is an indented code block that should be ignored:
178
179    function hello() {
180        console.log("This is indented, not fenced");
181    }
182
183But this fenced block without language should be detected:
184
185```
186function hello() {
187    console.log("This is fenced without language");
188}
189```
190
191And this indented one should still be ignored:
192
193    def hello():
194        print("Still indented")
195"#;
196        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
197        let rule = MD040;
198        let violations = rule.check(&document).unwrap();
199
200        // Should only detect the fenced code block, not the indented ones
201        assert_eq!(violations.len(), 1);
202        assert_eq!(violations[0].line, 11);
203    }
204
205    #[test]
206    fn test_md040_whitespace_only_info() {
207        let content = r#"# Code Block with Whitespace
208
209This code block has only whitespace in the info string:
210
211```
212function hello() {
213    console.log("Whitespace only info");
214}
215```
216
217This should also be detected as missing language.
218"#;
219        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
220        let rule = MD040;
221        let violations = rule.check(&document).unwrap();
222
223        assert_eq!(violations.len(), 1);
224        assert_eq!(violations[0].line, 5);
225    }
226
227    #[test]
228    fn test_md040_mixed_fenced_styles() {
229        let content = r#"# Mixed Fenced Styles
230
231Backtick fenced block without language:
232
233```
234console.log("backticks");
235```
236
237Tilde fenced block without language:
238
239~~~
240console.log("tildes");
241~~~
242
243Tilde fenced block with language:
244
245~~~javascript
246console.log("tildes with language");
247~~~
248"#;
249        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
250        let rule = MD040;
251        let violations = rule.check(&document).unwrap();
252
253        assert_eq!(violations.len(), 2);
254        assert_eq!(violations[0].line, 5);
255        assert_eq!(violations[1].line, 11);
256    }
257
258    #[test]
259    fn test_md040_empty_code_blocks() {
260        let content = r#"# Empty Code Blocks
261
262Empty fenced block without language:
263
264```
265```
266
267Empty fenced block with language:
268
269```bash
270```
271
272Another empty block without language:
273
274```
275
276```
277"#;
278        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
279        let rule = MD040;
280        let violations = rule.check(&document).unwrap();
281
282        assert_eq!(violations.len(), 2);
283        assert_eq!(violations[0].line, 5);
284        assert_eq!(violations[1].line, 15);
285    }
286
287    #[test]
288    fn test_md040_language_with_attributes() {
289        let content = r#"# Code Blocks with Attributes
290
291Code block with language and attributes should be fine:
292
293```rust,no_run
294fn main() {
295    println!("Hello, world!");
296}
297```
298
299Code block with just attributes but no language should be detected:
300
301```
302function hello() {
303    console.log("Hello, world!");
304}
305```
306"#;
307        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
308        let rule = MD040;
309        let violations = rule.check(&document).unwrap();
310
311        // Should only detect the one without a proper language
312        assert_eq!(violations.len(), 1);
313        assert_eq!(violations[0].line, 13);
314    }
315}