1use serde::Deserialize;
2use std::collections::HashSet;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7 linter::{CharPosition, Context, Range, RuleLinter, RuleViolation},
8 rules::{Rule, RuleType},
9};
10
11#[derive(Debug, PartialEq, Clone, Deserialize, Default)]
13pub struct MD040FencedCodeLanguageTable {
14 #[serde(default)]
15 pub allowed_languages: Vec<String>,
16 #[serde(default)]
17 pub language_only: bool,
18}
19
20pub(crate) struct MD040Linter {
21 context: Rc<Context>,
22 violations: Vec<RuleViolation>,
23}
24
25impl MD040Linter {
26 pub fn new(context: Rc<Context>) -> Self {
27 Self {
28 context,
29 violations: Vec::new(),
30 }
31 }
32
33 fn extract_code_block_language<'a>(&self, line: &'a str) -> (Option<&'a str>, bool) {
38 let trimmed = line.trim_start();
39 let marker = if trimmed.starts_with("```") {
40 "```"
41 } else if trimmed.starts_with("~~~") {
42 "~~~"
43 } else {
44 return (None, false);
45 };
46
47 let info_string = trimmed[marker.len()..].trim();
48
49 if info_string.is_empty() {
50 return (None, false);
51 }
52
53 let mut parts = info_string.split_whitespace();
54 let language_part = parts.next().unwrap();
56 let has_extra_info = parts.next().is_some();
57
58 let language = language_part.split('{').next().unwrap();
60
61 if language.is_empty() {
62 (None, has_extra_info)
63 } else {
64 (Some(language), has_extra_info)
65 }
66 }
67}
68
69impl RuleLinter for MD040Linter {
70 fn feed(&mut self, _node: &Node) {
71 }
74
75 fn finalize(&mut self) -> Vec<RuleViolation> {
76 let config = &self.context.config.linters.settings.fenced_code_language;
77 let node_cache = self.context.node_cache.borrow();
78 let lines = self.context.lines.borrow();
79
80 let allowed_languages_set: Option<HashSet<&str>> = if !config.allowed_languages.is_empty() {
82 Some(
83 config
84 .allowed_languages
85 .iter()
86 .map(String::as_str)
87 .collect(),
88 )
89 } else {
90 None
91 };
92
93 if let Some(fenced_code_blocks) = node_cache.get("fenced_code_block") {
94 for node_info in fenced_code_blocks {
95 if let Some(first_line) = lines.get(node_info.line_start) {
96 let (language_opt, has_extra_info) =
97 self.extract_code_block_language(first_line);
98
99 let range = Range {
100 start: CharPosition {
101 line: node_info.line_start,
102 character: 0,
103 },
104 end: CharPosition {
105 line: node_info.line_start,
106 character: first_line.len(),
107 },
108 };
109
110 let language = match language_opt {
111 Some(lang) => lang,
112 None => {
113 self.violations.push(RuleViolation::new(
114 &MD040,
115 "Fenced code blocks should have a language specified".to_string(),
116 self.context.file_path.clone(),
117 range,
118 ));
119 continue;
120 }
121 };
122
123 if let Some(set) = &allowed_languages_set {
124 if !set.contains(language) {
125 self.violations.push(RuleViolation::new(
126 &MD040,
127 format!("\"{language}\" is not allowed"),
128 self.context.file_path.clone(),
129 range,
130 ));
131 continue;
132 }
133 }
134
135 if config.language_only && has_extra_info {
137 let range = Range {
138 start: CharPosition {
139 line: node_info.line_start,
140 character: 0,
141 },
142 end: CharPosition {
143 line: node_info.line_start,
144 character: first_line.len(),
145 },
146 };
147 let violation = RuleViolation::new(
148 &MD040,
149 format!(
150 "Info string contains more than language: \"{}\"",
151 first_line.trim()
152 ),
153 self.context.file_path.clone(),
154 range,
155 );
156 self.violations.push(violation);
157 }
158 }
159 }
160 }
161
162 std::mem::take(&mut self.violations)
163 }
164}
165
166pub const MD040: Rule = Rule {
167 id: "MD040",
168 alias: "fenced-code-language",
169 tags: &["code", "language"],
170 description: "Fenced code blocks should have a language specified",
171 rule_type: RuleType::Document,
172 required_nodes: &["fenced_code_block"],
173 new_linter: |context| Box::new(MD040Linter::new(context)),
174};
175
176#[cfg(test)]
177mod test {
178 use std::path::PathBuf;
179
180 use crate::config::{LintersSettingsTable, MD040FencedCodeLanguageTable, RuleSeverity};
181 use crate::linter::MultiRuleLinter;
182 use crate::test_utils::test_helpers::test_config_with_settings;
183
184 fn test_config_default() -> crate::config::QuickmarkConfig {
185 test_config_with_settings(
186 vec![("fenced-code-language", RuleSeverity::Error)],
187 LintersSettingsTable {
188 fenced_code_language: MD040FencedCodeLanguageTable {
189 allowed_languages: vec![],
190 language_only: false,
191 },
192 ..Default::default()
193 },
194 )
195 }
196
197 fn test_config_with_allowed_languages(
198 allowed_languages: Vec<&str>,
199 ) -> crate::config::QuickmarkConfig {
200 test_config_with_settings(
201 vec![("fenced-code-language", RuleSeverity::Error)],
202 LintersSettingsTable {
203 fenced_code_language: MD040FencedCodeLanguageTable {
204 allowed_languages: allowed_languages.iter().map(|s| s.to_string()).collect(),
205 language_only: false,
206 },
207 ..Default::default()
208 },
209 )
210 }
211
212 fn test_config_with_language_only(language_only: bool) -> crate::config::QuickmarkConfig {
213 test_config_with_settings(
214 vec![("fenced-code-language", RuleSeverity::Error)],
215 LintersSettingsTable {
216 fenced_code_language: MD040FencedCodeLanguageTable {
217 allowed_languages: vec![],
218 language_only,
219 },
220 ..Default::default()
221 },
222 )
223 }
224
225 fn test_config_with_both_options(
226 allowed_languages: Vec<&str>,
227 language_only: bool,
228 ) -> crate::config::QuickmarkConfig {
229 test_config_with_settings(
230 vec![("fenced-code-language", RuleSeverity::Error)],
231 LintersSettingsTable {
232 fenced_code_language: MD040FencedCodeLanguageTable {
233 allowed_languages: allowed_languages.iter().map(|s| s.to_string()).collect(),
234 language_only,
235 },
236 ..Default::default()
237 },
238 )
239 }
240
241 #[test]
242 fn test_fenced_code_with_language_no_violations() {
243 let config = test_config_default();
244 let input = "# Test
245
246```rust
247fn main() {
248 println!(\"Hello, World!\");
249}
250```
251
252```javascript
253console.log('Hello, World!');
254```
255
256```text
257Plain text content
258```";
259
260 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
261 let violations = linter.analyze();
262 let md040_violations: Vec<_> = violations
263 .iter()
264 .filter(|v| v.rule().id == "MD040")
265 .collect();
266 assert_eq!(md040_violations.len(), 0);
267 }
268
269 #[test]
270 fn test_fenced_code_without_language_violations() {
271 let config = test_config_default();
272 let input = "# Test
273
274```
275def hello():
276 print(\"Hello, World!\")
277```
278
279```rust
280fn main() {
281 println!(\"Hello, World!\");
282}
283```
284
285```
286console.log('Hello, World!');
287```";
288
289 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
290 let violations = linter.analyze();
291 let md040_violations: Vec<_> = violations
292 .iter()
293 .filter(|v| v.rule().id == "MD040")
294 .collect();
295
296 assert_eq!(md040_violations.len(), 2);
298 }
299
300 #[test]
301 fn test_allowed_languages_specific_list() {
302 let config = test_config_with_allowed_languages(vec!["rust", "python"]);
303 let input = "# Test
304
305```rust
306fn main() {}
307```
308
309```python
310def hello(): pass
311```
312
313```javascript
314console.log('not allowed');
315```
316
317```
318no language specified
319```";
320
321 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
322 let violations = linter.analyze();
323 let md040_violations: Vec<_> = violations
324 .iter()
325 .filter(|v| v.rule().id == "MD040")
326 .collect();
327
328 assert_eq!(md040_violations.len(), 2);
330 assert!(md040_violations
331 .iter()
332 .any(|v| v.message().contains("javascript")));
333 }
334
335 #[test]
336 fn test_language_only_option_no_extra_info() {
337 let config = test_config_with_language_only(true);
338 let input = "# Test
339
340```rust
341fn main() {}
342```
343
344```python {.line-numbers}
345def hello(): pass
346```
347
348```javascript copy
349console.log('Hello');
350```";
351
352 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
353 let violations = linter.analyze();
354 let md040_violations: Vec<_> = violations
355 .iter()
356 .filter(|v| v.rule().id == "MD040")
357 .collect();
358
359 assert_eq!(md040_violations.len(), 2);
361 }
362
363 #[test]
364 fn test_language_only_option_language_only_allowed() {
365 let config = test_config_with_language_only(true);
366 let input = "# Test
367
368```rust
369fn main() {}
370```
371
372```python
373def hello(): pass
374```";
375
376 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
377 let violations = linter.analyze();
378 let md040_violations: Vec<_> = violations
379 .iter()
380 .filter(|v| v.rule().id == "MD040")
381 .collect();
382
383 assert_eq!(md040_violations.len(), 0);
385 }
386
387 #[test]
388 fn test_combined_options() {
389 let config = test_config_with_both_options(vec!["rust", "python"], true);
390 let input = "# Test
391
392```rust
393fn main() {}
394```
395
396```python copy
397def hello(): pass
398```
399
400```javascript
401console.log('Hello');
402```
403
404```
405no language
406```";
407
408 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
409 let violations = linter.analyze();
410 let md040_violations: Vec<_> = violations
411 .iter()
412 .filter(|v| v.rule().id == "MD040")
413 .collect();
414
415 assert_eq!(md040_violations.len(), 3);
420 }
421
422 #[test]
423 fn test_indented_code_blocks_ignored() {
424 let config = test_config_default();
425 let input = "# Test
426
427 def hello():
428 print(\"This is indented code\")
429
430```
431def hello():
432 print(\"This is fenced code without language\")
433```";
434
435 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
436 let violations = linter.analyze();
437 let md040_violations: Vec<_> = violations
438 .iter()
439 .filter(|v| v.rule().id == "MD040")
440 .collect();
441
442 assert_eq!(md040_violations.len(), 1);
445 }
446
447 #[test]
448 fn test_case_sensitivity_in_languages() {
449 let config = test_config_with_allowed_languages(vec!["rust", "PYTHON"]);
450 let input = "# Test
451
452```Rust
453fn main() {}
454```
455
456```python
457def hello(): pass
458```
459
460```PYTHON
461def hello(): pass
462```";
463
464 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
465 let violations = linter.analyze();
466 let md040_violations: Vec<_> = violations
467 .iter()
468 .filter(|v| v.rule().id == "MD040")
469 .collect();
470
471 assert_eq!(md040_violations.len(), 2);
473 }
474
475 #[test]
476 fn test_empty_fenced_code_blocks() {
477 let config = test_config_default();
478 let input = "# Test
479
480```
481
482```
483
484```rust
485
486```";
487
488 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
489 let violations = linter.analyze();
490 let md040_violations: Vec<_> = violations
491 .iter()
492 .filter(|v| v.rule().id == "MD040")
493 .collect();
494
495 assert_eq!(md040_violations.len(), 1);
497 }
498
499 #[test]
500 fn test_tildes_fenced_code_blocks() {
501 let config = test_config_default();
502 let input = "# Test
503
504~~~
505def hello():
506 print(\"Hello\")
507~~~
508
509~~~python
510def hello():
511 print(\"Hello\")
512~~~";
513
514 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
515 let violations = linter.analyze();
516 let md040_violations: Vec<_> = violations
517 .iter()
518 .filter(|v| v.rule().id == "MD040")
519 .collect();
520
521 assert_eq!(md040_violations.len(), 1);
523 }
524}