Skip to main content

rumdl_lib/rules/md061_forbidden_terms/
mod.rs

1use crate::filtered_lines::FilteredLinesExt;
2use regex::{Regex, RegexBuilder};
3
4use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6
7mod md061_config;
8pub use md061_config::MD061Config;
9
10/// Rule MD061: Forbidden terms
11///
12/// See [docs/md061.md](../../docs/md061.md) for full documentation, configuration, and examples.
13
14#[derive(Debug, Clone, Default)]
15pub struct MD061ForbiddenTerms {
16    config: MD061Config,
17    pattern: Option<Regex>,
18}
19
20impl MD061ForbiddenTerms {
21    pub fn new(terms: Vec<String>, case_sensitive: bool) -> Self {
22        let config = MD061Config { terms, case_sensitive };
23        let pattern = Self::build_pattern(&config);
24        Self { config, pattern }
25    }
26
27    pub fn from_config_struct(config: MD061Config) -> Self {
28        let pattern = Self::build_pattern(&config);
29        Self { config, pattern }
30    }
31
32    fn build_pattern(config: &MD061Config) -> Option<Regex> {
33        if config.terms.is_empty() {
34            return None;
35        }
36
37        // Build alternation pattern from terms, escaping regex metacharacters
38        let escaped_terms: Vec<String> = config.terms.iter().map(|term| regex::escape(term)).collect();
39        let pattern_str = escaped_terms.join("|");
40
41        RegexBuilder::new(&pattern_str)
42            .case_insensitive(!config.case_sensitive)
43            .build()
44            .ok()
45    }
46
47    /// Check if match is at a word boundary
48    fn is_word_boundary(content: &str, start: usize, end: usize) -> bool {
49        let before_ok = if start == 0 {
50            true
51        } else {
52            content[..start]
53                .chars()
54                .last()
55                .map(|c| !c.is_alphanumeric() && c != '_')
56                .unwrap_or(true)
57        };
58
59        let after_ok = if end >= content.len() {
60            true
61        } else {
62            content[end..]
63                .chars()
64                .next()
65                .map(|c| !c.is_alphanumeric() && c != '_')
66                .unwrap_or(true)
67        };
68
69        before_ok && after_ok
70    }
71}
72
73impl Rule for MD061ForbiddenTerms {
74    fn name(&self) -> &'static str {
75        "MD061"
76    }
77
78    fn description(&self) -> &'static str {
79        "Forbidden terms"
80    }
81
82    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
83        // Early return if no terms configured
84        let pattern = match &self.pattern {
85            Some(p) => p,
86            None => return Ok(Vec::new()),
87        };
88
89        let mut warnings = Vec::new();
90
91        // Use filtered_lines to skip frontmatter, code blocks, HTML comments, and Obsidian comments
92        for line in ctx
93            .filtered_lines()
94            .skip_front_matter()
95            .skip_code_blocks()
96            .skip_html_comments()
97            .skip_jsx_expressions()
98            .skip_mdx_comments()
99            .skip_obsidian_comments()
100        {
101            let content = line.content;
102
103            // Find all matches in this line
104            for mat in pattern.find_iter(content) {
105                // Skip if inside inline code (col is 1-indexed)
106                if ctx.is_in_code_span(line.line_num, mat.start() + 1) {
107                    continue;
108                }
109
110                // Check word boundaries
111                if !Self::is_word_boundary(content, mat.start(), mat.end()) {
112                    continue;
113                }
114
115                let matched_term = &content[mat.start()..mat.end()];
116                let display_term = if self.config.case_sensitive {
117                    matched_term.to_string()
118                } else {
119                    matched_term.to_uppercase()
120                };
121
122                warnings.push(LintWarning {
123                    rule_name: Some(self.name().to_string()),
124                    severity: Severity::Warning,
125                    message: format!("Found forbidden term '{display_term}'"),
126                    line: line.line_num,
127                    column: mat.start() + 1,
128                    end_line: line.line_num,
129                    end_column: mat.end() + 1,
130                    fix: None, // No auto-fix for warning comments
131                });
132            }
133        }
134
135        Ok(warnings)
136    }
137
138    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
139        Ok(ctx.content.to_string())
140    }
141
142    fn category(&self) -> RuleCategory {
143        RuleCategory::Other
144    }
145
146    fn fix_capability(&self) -> FixCapability {
147        FixCapability::Unfixable
148    }
149
150    fn as_any(&self) -> &dyn std::any::Any {
151        self
152    }
153
154    fn should_skip(&self, _ctx: &crate::lint_context::LintContext) -> bool {
155        // Skip if no terms configured
156        self.config.terms.is_empty()
157    }
158
159    fn default_config_section(&self) -> Option<(String, toml::Value)> {
160        let default_config = MD061Config::default();
161        let json_value = serde_json::to_value(&default_config).ok()?;
162        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
163
164        if let toml::Value::Table(table) = toml_value {
165            if !table.is_empty() {
166                Some((MD061Config::RULE_NAME.to_string(), toml::Value::Table(table)))
167            } else {
168                None
169            }
170        } else {
171            None
172        }
173    }
174
175    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
176    where
177        Self: Sized,
178    {
179        let rule_config = crate::rule_config_serde::load_rule_config::<MD061Config>(config);
180        Box::new(Self::from_config_struct(rule_config))
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187    use crate::config::MarkdownFlavor;
188    use crate::lint_context::LintContext;
189
190    #[test]
191    fn test_empty_config_no_warnings() {
192        let rule = MD061ForbiddenTerms::default();
193        let content = "# TODO: This should not trigger\n\nFIXME: This too\n";
194        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
195        let result = rule.check(&ctx).unwrap();
196        assert!(result.is_empty());
197    }
198
199    #[test]
200    fn test_configured_terms_detected() {
201        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
202        let content = "# Heading\n\nTODO: Implement this\n\nFIXME: Fix this bug\n";
203        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
204        let result = rule.check(&ctx).unwrap();
205        assert_eq!(result.len(), 2);
206        assert!(result[0].message.contains("forbidden term"));
207        assert!(result[0].message.contains("TODO"));
208        assert!(result[1].message.contains("forbidden term"));
209        assert!(result[1].message.contains("FIXME"));
210    }
211
212    #[test]
213    fn test_case_sensitive_by_default() {
214        // Default is case-sensitive, so only exact match "TODO" is found
215        let config = MD061Config {
216            terms: vec!["TODO".to_string()],
217            ..Default::default()
218        };
219        let rule = MD061ForbiddenTerms::from_config_struct(config);
220        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
221        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
222        let result = rule.check(&ctx).unwrap();
223        assert_eq!(result.len(), 1);
224        assert_eq!(result[0].line, 2); // Only "TODO" on line 2 matches
225    }
226
227    #[test]
228    fn test_case_insensitive_opt_in() {
229        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
230        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
231        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
232        let result = rule.check(&ctx).unwrap();
233        assert_eq!(result.len(), 3);
234    }
235
236    #[test]
237    fn test_case_sensitive_mode() {
238        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], true);
239        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
240        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
241        let result = rule.check(&ctx).unwrap();
242        assert_eq!(result.len(), 1);
243        assert_eq!(result[0].line, 2);
244    }
245
246    #[test]
247    fn test_word_boundary_no_false_positive() {
248        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
249        let content = "TODOMORROW is not a match\nTODO is a match\n";
250        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
251        let result = rule.check(&ctx).unwrap();
252        assert_eq!(result.len(), 1);
253        assert_eq!(result[0].line, 2);
254    }
255
256    #[test]
257    fn test_word_boundary_with_punctuation() {
258        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
259        let content = "TODO: colon\nTODO. period\n(TODO) parens\n";
260        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
261        let result = rule.check(&ctx).unwrap();
262        assert_eq!(result.len(), 3);
263    }
264
265    #[test]
266    fn test_skip_fenced_code_block() {
267        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
268        let content = "# Heading\n\n```\nTODO: in code block\n```\n\nTODO: outside\n";
269        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
270        let result = rule.check(&ctx).unwrap();
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].line, 7);
273    }
274
275    #[test]
276    fn test_skip_indented_code_block() {
277        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
278        let content = "# Heading\n\n    TODO: in indented code\n\nTODO: outside\n";
279        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
280        let result = rule.check(&ctx).unwrap();
281        assert_eq!(result.len(), 1);
282        assert_eq!(result[0].line, 5);
283    }
284
285    #[test]
286    fn test_skip_inline_code() {
287        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
288        let content = "Here is `TODO` in inline code\nTODO: outside inline\n";
289        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
290        let result = rule.check(&ctx).unwrap();
291        assert_eq!(result.len(), 1);
292        assert_eq!(result[0].line, 2);
293    }
294
295    #[test]
296    fn test_skip_frontmatter() {
297        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
298        let content = "---\ntitle: TODO in frontmatter\n---\n\nTODO: outside\n";
299        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
300        let result = rule.check(&ctx).unwrap();
301        assert_eq!(result.len(), 1);
302        assert_eq!(result[0].line, 5);
303    }
304
305    #[test]
306    fn test_multiple_terms_on_same_line() {
307        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
308        let content = "TODO: first thing FIXME: second thing\n";
309        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
310        let result = rule.check(&ctx).unwrap();
311        assert_eq!(result.len(), 2);
312    }
313
314    #[test]
315    fn test_term_at_start_of_line() {
316        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
317        let content = "TODO at start\n";
318        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
319        let result = rule.check(&ctx).unwrap();
320        assert_eq!(result.len(), 1);
321        assert_eq!(result[0].column, 1);
322    }
323
324    #[test]
325    fn test_term_at_end_of_line() {
326        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
327        let content = "something TODO\n";
328        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
329        let result = rule.check(&ctx).unwrap();
330        assert_eq!(result.len(), 1);
331    }
332
333    #[test]
334    fn test_custom_terms() {
335        let rule = MD061ForbiddenTerms::new(vec!["HACK".to_string(), "XXX".to_string()], false);
336        let content = "HACK: workaround\nXXX: needs review\nTODO: not configured\n";
337        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
338        let result = rule.check(&ctx).unwrap();
339        assert_eq!(result.len(), 2);
340    }
341
342    #[test]
343    fn test_no_fix_available() {
344        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
345        let content = "TODO: something\n";
346        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
347        let result = rule.check(&ctx).unwrap();
348        assert_eq!(result.len(), 1);
349        assert!(result[0].fix.is_none());
350    }
351
352    #[test]
353    fn test_column_positions() {
354        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
355        // Use 2 spaces, not 4 (4 spaces creates a code block)
356        let content = "  TODO: indented\n";
357        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
358        let result = rule.check(&ctx).unwrap();
359        assert_eq!(result.len(), 1);
360        assert_eq!(result[0].column, 3); // 1-based column, TODO starts at col 3
361        assert_eq!(result[0].end_column, 7);
362    }
363
364    #[test]
365    fn test_config_from_toml() {
366        let mut config = crate::config::Config::default();
367        let mut rule_config = crate::config::RuleConfig::default();
368        rule_config.values.insert(
369            "terms".to_string(),
370            toml::Value::Array(vec![toml::Value::String("FIXME".to_string())]),
371        );
372        config.rules.insert("MD061".to_string(), rule_config);
373
374        let rule = MD061ForbiddenTerms::from_config(&config);
375        let content = "FIXME: configured\nTODO: not configured\n";
376        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
377        let result = rule.check(&ctx).unwrap();
378        assert_eq!(result.len(), 1);
379        assert!(result[0].message.contains("forbidden term"));
380        assert!(result[0].message.contains("FIXME"));
381    }
382
383    #[test]
384    fn test_config_from_toml_case_sensitive_by_default() {
385        // Simulates user config: [MD061] terms = ["TODO"]
386        // Without explicitly setting case_sensitive, should default to true
387        let mut config = crate::config::Config::default();
388        let mut rule_config = crate::config::RuleConfig::default();
389        rule_config.values.insert(
390            "terms".to_string(),
391            toml::Value::Array(vec![toml::Value::String("TODO".to_string())]),
392        );
393        config.rules.insert("MD061".to_string(), rule_config);
394
395        let rule = MD061ForbiddenTerms::from_config(&config);
396        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
397        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
398        let result = rule.check(&ctx).unwrap();
399
400        // Should only match "TODO" (uppercase), not "todo" or "Todo"
401        assert_eq!(result.len(), 1);
402        assert_eq!(result[0].line, 2);
403    }
404
405    #[test]
406    fn test_skip_html_comment() {
407        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
408        let content = "<!-- TODO: in html comment -->\nTODO: outside\n";
409        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
410        let result = rule.check(&ctx).unwrap();
411        assert_eq!(result.len(), 1);
412        assert_eq!(result[0].line, 2);
413    }
414
415    #[test]
416    fn test_skip_double_backtick_inline_code() {
417        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
418        let content = "Here is ``TODO`` in double backticks\nTODO: outside\n";
419        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
420        let result = rule.check(&ctx).unwrap();
421        assert_eq!(result.len(), 1);
422        assert_eq!(result[0].line, 2);
423    }
424
425    #[test]
426    fn test_skip_triple_backtick_inline_code() {
427        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
428        let content = "Here is ```TODO``` in triple backticks\nTODO: outside\n";
429        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
430        let result = rule.check(&ctx).unwrap();
431        assert_eq!(result.len(), 1);
432        assert_eq!(result[0].line, 2);
433    }
434
435    #[test]
436    fn test_inline_code_with_backtick_content() {
437        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
438        // Content with a backtick inside: `` `TODO` ``
439        let content = "Use `` `TODO` `` to show a backtick\nTODO: outside\n";
440        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
441        let result = rule.check(&ctx).unwrap();
442        assert_eq!(result.len(), 1);
443        assert_eq!(result[0].line, 2);
444    }
445}