quickmark_core/rules/
md040.rs

1use serde::Deserialize;
2use std::collections::HashSet;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7    linter::{CharPosition, Context, Range, RuleLinter, RuleViolation},
8    rules::{Rule, RuleType},
9};
10
11// MD040-specific configuration types
12#[derive(Debug, PartialEq, Clone, Deserialize, Default)]
13pub struct MD040FencedCodeLanguageTable {
14    #[serde(default)]
15    pub allowed_languages: Vec<String>,
16    #[serde(default)]
17    pub language_only: bool,
18}
19
20pub(crate) struct MD040Linter {
21    context: Rc<Context>,
22    violations: Vec<RuleViolation>,
23}
24
25impl MD040Linter {
26    pub fn new(context: Rc<Context>) -> Self {
27        Self {
28            context,
29            violations: Vec::new(),
30        }
31    }
32
33    /// Extracts the language identifier from a fenced code block's first line.
34    /// This handles common variations like attributes (e.g., ```rust{{...}}).
35    /// Returns `(Option<language>, has_extra_info)`. The language is a slice
36    /// of the input line to avoid allocations.
37    fn extract_code_block_language<'a>(&self, line: &'a str) -> (Option<&'a str>, bool) {
38        let trimmed = line.trim_start();
39        let marker = if trimmed.starts_with("```") {
40            "```"
41        } else if trimmed.starts_with("~~~") {
42            "~~~"
43        } else {
44            return (None, false);
45        };
46
47        let info_string = trimmed[marker.len()..].trim();
48
49        if info_string.is_empty() {
50            return (None, false);
51        }
52
53        let mut parts = info_string.split_whitespace();
54        // The unwrap is safe because we've checked that info_string is not empty.
55        let language_part = parts.next().unwrap();
56        let has_extra_info = parts.next().is_some();
57
58        // The unwrap is safe because split always returns an iterator with at least one element.
59        let language = language_part.split('{').next().unwrap();
60
61        if language.is_empty() {
62            (None, has_extra_info)
63        } else {
64            (Some(language), has_extra_info)
65        }
66    }
67}
68
69impl RuleLinter for MD040Linter {
70    fn feed(&mut self, _node: &Node) {
71        // MD040 uses Document pattern, not Token pattern
72        // All processing happens in finalize()
73    }
74
75    fn finalize(&mut self) -> Vec<RuleViolation> {
76        let config = &self.context.config.linters.settings.fenced_code_language;
77        let node_cache = self.context.node_cache.borrow();
78        let lines = self.context.lines.borrow();
79
80        // For performance, convert allowed_languages to a HashSet if it's not empty.
81        let allowed_languages_set: Option<HashSet<&str>> = if !config.allowed_languages.is_empty() {
82            Some(
83                config
84                    .allowed_languages
85                    .iter()
86                    .map(String::as_str)
87                    .collect(),
88            )
89        } else {
90            None
91        };
92
93        if let Some(fenced_code_blocks) = node_cache.get("fenced_code_block") {
94            for node_info in fenced_code_blocks {
95                if let Some(first_line) = lines.get(node_info.line_start) {
96                    let (language_opt, has_extra_info) =
97                        self.extract_code_block_language(first_line);
98
99                    let range = Range {
100                        start: CharPosition {
101                            line: node_info.line_start,
102                            character: 0,
103                        },
104                        end: CharPosition {
105                            line: node_info.line_start,
106                            character: first_line.len(),
107                        },
108                    };
109
110                    let language = match language_opt {
111                        Some(lang) => lang,
112                        None => {
113                            self.violations.push(RuleViolation::new(
114                                &MD040,
115                                "Fenced code blocks should have a language specified".to_string(),
116                                self.context.file_path.clone(),
117                                range,
118                            ));
119                            continue;
120                        }
121                    };
122
123                    if let Some(set) = &allowed_languages_set {
124                        if !set.contains(language) {
125                            self.violations.push(RuleViolation::new(
126                                &MD040,
127                                format!("\"{language}\" is not allowed"),
128                                self.context.file_path.clone(),
129                                range,
130                            ));
131                            continue;
132                        }
133                    }
134
135                    // Check if language_only is true and there's extra metadata
136                    if config.language_only && has_extra_info {
137                        let range = Range {
138                            start: CharPosition {
139                                line: node_info.line_start,
140                                character: 0,
141                            },
142                            end: CharPosition {
143                                line: node_info.line_start,
144                                character: first_line.len(),
145                            },
146                        };
147                        let violation = RuleViolation::new(
148                            &MD040,
149                            format!(
150                                "Info string contains more than language: \"{}\"",
151                                first_line.trim()
152                            ),
153                            self.context.file_path.clone(),
154                            range,
155                        );
156                        self.violations.push(violation);
157                    }
158                }
159            }
160        }
161
162        std::mem::take(&mut self.violations)
163    }
164}
165
166pub const MD040: Rule = Rule {
167    id: "MD040",
168    alias: "fenced-code-language",
169    tags: &["code", "language"],
170    description: "Fenced code blocks should have a language specified",
171    rule_type: RuleType::Document,
172    required_nodes: &["fenced_code_block"],
173    new_linter: |context| Box::new(MD040Linter::new(context)),
174};
175
176#[cfg(test)]
177mod test {
178    use std::path::PathBuf;
179
180    use crate::config::{LintersSettingsTable, MD040FencedCodeLanguageTable, RuleSeverity};
181    use crate::linter::MultiRuleLinter;
182    use crate::test_utils::test_helpers::test_config_with_settings;
183
184    fn test_config_default() -> crate::config::QuickmarkConfig {
185        test_config_with_settings(
186            vec![("fenced-code-language", RuleSeverity::Error)],
187            LintersSettingsTable {
188                fenced_code_language: MD040FencedCodeLanguageTable {
189                    allowed_languages: vec![],
190                    language_only: false,
191                },
192                ..Default::default()
193            },
194        )
195    }
196
197    fn test_config_with_allowed_languages(
198        allowed_languages: Vec<&str>,
199    ) -> crate::config::QuickmarkConfig {
200        test_config_with_settings(
201            vec![("fenced-code-language", RuleSeverity::Error)],
202            LintersSettingsTable {
203                fenced_code_language: MD040FencedCodeLanguageTable {
204                    allowed_languages: allowed_languages.iter().map(|s| s.to_string()).collect(),
205                    language_only: false,
206                },
207                ..Default::default()
208            },
209        )
210    }
211
212    fn test_config_with_language_only(language_only: bool) -> crate::config::QuickmarkConfig {
213        test_config_with_settings(
214            vec![("fenced-code-language", RuleSeverity::Error)],
215            LintersSettingsTable {
216                fenced_code_language: MD040FencedCodeLanguageTable {
217                    allowed_languages: vec![],
218                    language_only,
219                },
220                ..Default::default()
221            },
222        )
223    }
224
225    fn test_config_with_both_options(
226        allowed_languages: Vec<&str>,
227        language_only: bool,
228    ) -> crate::config::QuickmarkConfig {
229        test_config_with_settings(
230            vec![("fenced-code-language", RuleSeverity::Error)],
231            LintersSettingsTable {
232                fenced_code_language: MD040FencedCodeLanguageTable {
233                    allowed_languages: allowed_languages.iter().map(|s| s.to_string()).collect(),
234                    language_only,
235                },
236                ..Default::default()
237            },
238        )
239    }
240
241    #[test]
242    fn test_fenced_code_with_language_no_violations() {
243        let config = test_config_default();
244        let input = "# Test
245
246```rust
247fn main() {
248    println!(\"Hello, World!\");
249}
250```
251
252```javascript
253console.log('Hello, World!');
254```
255
256```text
257Plain text content
258```";
259
260        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
261        let violations = linter.analyze();
262        let md040_violations: Vec<_> = violations
263            .iter()
264            .filter(|v| v.rule().id == "MD040")
265            .collect();
266        assert_eq!(md040_violations.len(), 0);
267    }
268
269    #[test]
270    fn test_fenced_code_without_language_violations() {
271        let config = test_config_default();
272        let input = "# Test
273
274```
275def hello():
276    print(\"Hello, World!\")
277```
278
279```rust
280fn main() {
281    println!(\"Hello, World!\");
282}
283```
284
285```
286console.log('Hello, World!');
287```";
288
289        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290        let violations = linter.analyze();
291        let md040_violations: Vec<_> = violations
292            .iter()
293            .filter(|v| v.rule().id == "MD040")
294            .collect();
295
296        // Should find 2 violations: the two fenced code blocks without languages
297        assert_eq!(md040_violations.len(), 2);
298    }
299
300    #[test]
301    fn test_allowed_languages_specific_list() {
302        let config = test_config_with_allowed_languages(vec!["rust", "python"]);
303        let input = "# Test
304
305```rust
306fn main() {}
307```
308
309```python
310def hello(): pass
311```
312
313```javascript
314console.log('not allowed');
315```
316
317```
318no language specified
319```";
320
321        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
322        let violations = linter.analyze();
323        let md040_violations: Vec<_> = violations
324            .iter()
325            .filter(|v| v.rule().id == "MD040")
326            .collect();
327
328        // Should find 2 violations: javascript (not in allowed list) and no language
329        assert_eq!(md040_violations.len(), 2);
330        assert!(md040_violations
331            .iter()
332            .any(|v| v.message().contains("javascript")));
333    }
334
335    #[test]
336    fn test_language_only_option_no_extra_info() {
337        let config = test_config_with_language_only(true);
338        let input = "# Test
339
340```rust
341fn main() {}
342```
343
344```python {.line-numbers}
345def hello(): pass
346```
347
348```javascript copy
349console.log('Hello');
350```";
351
352        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
353        let violations = linter.analyze();
354        let md040_violations: Vec<_> = violations
355            .iter()
356            .filter(|v| v.rule().id == "MD040")
357            .collect();
358
359        // Should find 2 violations: python and javascript have extra info beyond language
360        assert_eq!(md040_violations.len(), 2);
361    }
362
363    #[test]
364    fn test_language_only_option_language_only_allowed() {
365        let config = test_config_with_language_only(true);
366        let input = "# Test
367
368```rust
369fn main() {}
370```
371
372```python
373def hello(): pass
374```";
375
376        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
377        let violations = linter.analyze();
378        let md040_violations: Vec<_> = violations
379            .iter()
380            .filter(|v| v.rule().id == "MD040")
381            .collect();
382
383        // Should find no violations: both have only language specified
384        assert_eq!(md040_violations.len(), 0);
385    }
386
387    #[test]
388    fn test_combined_options() {
389        let config = test_config_with_both_options(vec!["rust", "python"], true);
390        let input = "# Test
391
392```rust
393fn main() {}
394```
395
396```python copy
397def hello(): pass
398```
399
400```javascript
401console.log('Hello');
402```
403
404```
405no language
406```";
407
408        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
409        let violations = linter.analyze();
410        let md040_violations: Vec<_> = violations
411            .iter()
412            .filter(|v| v.rule().id == "MD040")
413            .collect();
414
415        // Should find 3 violations:
416        // 1. python has extra info (violates language_only)
417        // 2. javascript not in allowed list
418        // 3. no language specified
419        assert_eq!(md040_violations.len(), 3);
420    }
421
422    #[test]
423    fn test_indented_code_blocks_ignored() {
424        let config = test_config_default();
425        let input = "# Test
426
427    def hello():
428        print(\"This is indented code\")
429
430```
431def hello():
432    print(\"This is fenced code without language\")
433```";
434
435        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
436        let violations = linter.analyze();
437        let md040_violations: Vec<_> = violations
438            .iter()
439            .filter(|v| v.rule().id == "MD040")
440            .collect();
441
442        // Should find only 1 violation: the fenced code block without language
443        // Indented code blocks should be ignored
444        assert_eq!(md040_violations.len(), 1);
445    }
446
447    #[test]
448    fn test_case_sensitivity_in_languages() {
449        let config = test_config_with_allowed_languages(vec!["rust", "PYTHON"]);
450        let input = "# Test
451
452```Rust
453fn main() {}
454```
455
456```python
457def hello(): pass
458```
459
460```PYTHON
461def hello(): pass
462```";
463
464        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
465        let violations = linter.analyze();
466        let md040_violations: Vec<_> = violations
467            .iter()
468            .filter(|v| v.rule().id == "MD040")
469            .collect();
470
471        // Should find 2 violations: "Rust" and "python" don't match case-sensitive allowed list
472        assert_eq!(md040_violations.len(), 2);
473    }
474
475    #[test]
476    fn test_empty_fenced_code_blocks() {
477        let config = test_config_default();
478        let input = "# Test
479
480```
481
482```
483
484```rust
485
486```";
487
488        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
489        let violations = linter.analyze();
490        let md040_violations: Vec<_> = violations
491            .iter()
492            .filter(|v| v.rule().id == "MD040")
493            .collect();
494
495        // Should find 1 violation: the first block has no language
496        assert_eq!(md040_violations.len(), 1);
497    }
498
499    #[test]
500    fn test_tildes_fenced_code_blocks() {
501        let config = test_config_default();
502        let input = "# Test
503
504~~~
505def hello():
506    print(\"Hello\")
507~~~
508
509~~~python
510def hello():
511    print(\"Hello\")
512~~~";
513
514        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
515        let violations = linter.analyze();
516        let md040_violations: Vec<_> = violations
517            .iter()
518            .filter(|v| v.rule().id == "MD040")
519            .collect();
520
521        // Should find 1 violation: the first block has no language
522        assert_eq!(md040_violations.len(), 1);
523    }
524}