rumdl_lib/rules/md061_forbidden_terms/
mod.rs

1use crate::filtered_lines::FilteredLinesExt;
2use regex::{Regex, RegexBuilder};
3
4use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rule_config_serde::RuleConfig;
6
7mod md061_config;
8pub use md061_config::MD061Config;
9
10/// Rule MD061: Forbidden terms
11///
12/// See [docs/md061.md](../../docs/md061.md) for full documentation, configuration, and examples.
13
14#[derive(Debug, Clone, Default)]
15pub struct MD061ForbiddenTerms {
16    config: MD061Config,
17    pattern: Option<Regex>,
18}
19
20impl MD061ForbiddenTerms {
21    pub fn new(terms: Vec<String>, case_sensitive: bool) -> Self {
22        let config = MD061Config { terms, case_sensitive };
23        let pattern = Self::build_pattern(&config);
24        Self { config, pattern }
25    }
26
27    pub fn from_config_struct(config: MD061Config) -> Self {
28        let pattern = Self::build_pattern(&config);
29        Self { config, pattern }
30    }
31
32    fn build_pattern(config: &MD061Config) -> Option<Regex> {
33        if config.terms.is_empty() {
34            return None;
35        }
36
37        // Build alternation pattern from terms, escaping regex metacharacters
38        let escaped_terms: Vec<String> = config.terms.iter().map(|term| regex::escape(term)).collect();
39        let pattern_str = escaped_terms.join("|");
40
41        RegexBuilder::new(&pattern_str)
42            .case_insensitive(!config.case_sensitive)
43            .build()
44            .ok()
45    }
46
47    /// Check if match is at a word boundary
48    fn is_word_boundary(content: &str, start: usize, end: usize) -> bool {
49        let before_ok = if start == 0 {
50            true
51        } else {
52            content[..start]
53                .chars()
54                .last()
55                .map(|c| !c.is_alphanumeric() && c != '_')
56                .unwrap_or(true)
57        };
58
59        let after_ok = if end >= content.len() {
60            true
61        } else {
62            content[end..]
63                .chars()
64                .next()
65                .map(|c| !c.is_alphanumeric() && c != '_')
66                .unwrap_or(true)
67        };
68
69        before_ok && after_ok
70    }
71}
72
73impl Rule for MD061ForbiddenTerms {
74    fn name(&self) -> &'static str {
75        "MD061"
76    }
77
78    fn description(&self) -> &'static str {
79        "Forbidden terms"
80    }
81
82    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
83        // Early return if no terms configured
84        let pattern = match &self.pattern {
85            Some(p) => p,
86            None => return Ok(Vec::new()),
87        };
88
89        let mut warnings = Vec::new();
90
91        // Use filtered_lines to skip frontmatter, code blocks, and HTML comments
92        for line in ctx
93            .filtered_lines()
94            .skip_front_matter()
95            .skip_code_blocks()
96            .skip_html_comments()
97        {
98            let content = line.content;
99
100            // Find all matches in this line
101            for mat in pattern.find_iter(content) {
102                // Skip if inside inline code (col is 1-indexed)
103                if ctx.is_in_code_span(line.line_num, mat.start() + 1) {
104                    continue;
105                }
106
107                // Check word boundaries
108                if !Self::is_word_boundary(content, mat.start(), mat.end()) {
109                    continue;
110                }
111
112                let matched_term = &content[mat.start()..mat.end()];
113                let display_term = if self.config.case_sensitive {
114                    matched_term.to_string()
115                } else {
116                    matched_term.to_uppercase()
117                };
118
119                warnings.push(LintWarning {
120                    rule_name: Some(self.name().to_string()),
121                    severity: Severity::Warning,
122                    message: format!("Found forbidden term '{display_term}'"),
123                    line: line.line_num,
124                    column: mat.start() + 1,
125                    end_line: line.line_num,
126                    end_column: mat.end() + 1,
127                    fix: None, // No auto-fix for warning comments
128                });
129            }
130        }
131
132        Ok(warnings)
133    }
134
135    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
136        // No auto-fix for this rule - return content unchanged
137        Ok(ctx.content.to_string())
138    }
139
140    fn as_any(&self) -> &dyn std::any::Any {
141        self
142    }
143
144    fn should_skip(&self, _ctx: &crate::lint_context::LintContext) -> bool {
145        // Skip if no terms configured
146        self.config.terms.is_empty()
147    }
148
149    fn default_config_section(&self) -> Option<(String, toml::Value)> {
150        let default_config = MD061Config::default();
151        let json_value = serde_json::to_value(&default_config).ok()?;
152        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
153
154        if let toml::Value::Table(table) = toml_value {
155            if !table.is_empty() {
156                Some((MD061Config::RULE_NAME.to_string(), toml::Value::Table(table)))
157            } else {
158                None
159            }
160        } else {
161            None
162        }
163    }
164
165    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
166    where
167        Self: Sized,
168    {
169        let rule_config = crate::rule_config_serde::load_rule_config::<MD061Config>(config);
170        Box::new(Self::from_config_struct(rule_config))
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177    use crate::config::MarkdownFlavor;
178    use crate::lint_context::LintContext;
179
180    #[test]
181    fn test_empty_config_no_warnings() {
182        let rule = MD061ForbiddenTerms::default();
183        let content = "# TODO: This should not trigger\n\nFIXME: This too\n";
184        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
185        let result = rule.check(&ctx).unwrap();
186        assert!(result.is_empty());
187    }
188
189    #[test]
190    fn test_configured_terms_detected() {
191        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
192        let content = "# Heading\n\nTODO: Implement this\n\nFIXME: Fix this bug\n";
193        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
194        let result = rule.check(&ctx).unwrap();
195        assert_eq!(result.len(), 2);
196        assert!(result[0].message.contains("forbidden term"));
197        assert!(result[0].message.contains("TODO"));
198        assert!(result[1].message.contains("forbidden term"));
199        assert!(result[1].message.contains("FIXME"));
200    }
201
202    #[test]
203    fn test_case_sensitive_by_default() {
204        // Default is case-sensitive, so only exact match "TODO" is found
205        let config = MD061Config {
206            terms: vec!["TODO".to_string()],
207            ..Default::default()
208        };
209        let rule = MD061ForbiddenTerms::from_config_struct(config);
210        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
211        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
212        let result = rule.check(&ctx).unwrap();
213        assert_eq!(result.len(), 1);
214        assert_eq!(result[0].line, 2); // Only "TODO" on line 2 matches
215    }
216
217    #[test]
218    fn test_case_insensitive_opt_in() {
219        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
220        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
221        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
222        let result = rule.check(&ctx).unwrap();
223        assert_eq!(result.len(), 3);
224    }
225
226    #[test]
227    fn test_case_sensitive_mode() {
228        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], true);
229        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
230        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
231        let result = rule.check(&ctx).unwrap();
232        assert_eq!(result.len(), 1);
233        assert_eq!(result[0].line, 2);
234    }
235
236    #[test]
237    fn test_word_boundary_no_false_positive() {
238        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
239        let content = "TODOMORROW is not a match\nTODO is a match\n";
240        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
241        let result = rule.check(&ctx).unwrap();
242        assert_eq!(result.len(), 1);
243        assert_eq!(result[0].line, 2);
244    }
245
246    #[test]
247    fn test_word_boundary_with_punctuation() {
248        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
249        let content = "TODO: colon\nTODO. period\n(TODO) parens\n";
250        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
251        let result = rule.check(&ctx).unwrap();
252        assert_eq!(result.len(), 3);
253    }
254
255    #[test]
256    fn test_skip_fenced_code_block() {
257        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
258        let content = "# Heading\n\n```\nTODO: in code block\n```\n\nTODO: outside\n";
259        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
260        let result = rule.check(&ctx).unwrap();
261        assert_eq!(result.len(), 1);
262        assert_eq!(result[0].line, 7);
263    }
264
265    #[test]
266    fn test_skip_indented_code_block() {
267        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
268        let content = "# Heading\n\n    TODO: in indented code\n\nTODO: outside\n";
269        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
270        let result = rule.check(&ctx).unwrap();
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].line, 5);
273    }
274
275    #[test]
276    fn test_skip_inline_code() {
277        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
278        let content = "Here is `TODO` in inline code\nTODO: outside inline\n";
279        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
280        let result = rule.check(&ctx).unwrap();
281        assert_eq!(result.len(), 1);
282        assert_eq!(result[0].line, 2);
283    }
284
285    #[test]
286    fn test_skip_frontmatter() {
287        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
288        let content = "---\ntitle: TODO in frontmatter\n---\n\nTODO: outside\n";
289        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
290        let result = rule.check(&ctx).unwrap();
291        assert_eq!(result.len(), 1);
292        assert_eq!(result[0].line, 5);
293    }
294
295    #[test]
296    fn test_multiple_terms_on_same_line() {
297        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
298        let content = "TODO: first thing FIXME: second thing\n";
299        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
300        let result = rule.check(&ctx).unwrap();
301        assert_eq!(result.len(), 2);
302    }
303
304    #[test]
305    fn test_term_at_start_of_line() {
306        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
307        let content = "TODO at start\n";
308        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
309        let result = rule.check(&ctx).unwrap();
310        assert_eq!(result.len(), 1);
311        assert_eq!(result[0].column, 1);
312    }
313
314    #[test]
315    fn test_term_at_end_of_line() {
316        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
317        let content = "something TODO\n";
318        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
319        let result = rule.check(&ctx).unwrap();
320        assert_eq!(result.len(), 1);
321    }
322
323    #[test]
324    fn test_custom_terms() {
325        let rule = MD061ForbiddenTerms::new(vec!["HACK".to_string(), "XXX".to_string()], false);
326        let content = "HACK: workaround\nXXX: needs review\nTODO: not configured\n";
327        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
328        let result = rule.check(&ctx).unwrap();
329        assert_eq!(result.len(), 2);
330    }
331
332    #[test]
333    fn test_no_fix_available() {
334        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
335        let content = "TODO: something\n";
336        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
337        let result = rule.check(&ctx).unwrap();
338        assert_eq!(result.len(), 1);
339        assert!(result[0].fix.is_none());
340    }
341
342    #[test]
343    fn test_column_positions() {
344        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
345        // Use 2 spaces, not 4 (4 spaces creates a code block)
346        let content = "  TODO: indented\n";
347        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
348        let result = rule.check(&ctx).unwrap();
349        assert_eq!(result.len(), 1);
350        assert_eq!(result[0].column, 3); // 1-based column, TODO starts at col 3
351        assert_eq!(result[0].end_column, 7);
352    }
353
354    #[test]
355    fn test_config_from_toml() {
356        let mut config = crate::config::Config::default();
357        let mut rule_config = crate::config::RuleConfig::default();
358        rule_config.values.insert(
359            "terms".to_string(),
360            toml::Value::Array(vec![toml::Value::String("FIXME".to_string())]),
361        );
362        config.rules.insert("MD061".to_string(), rule_config);
363
364        let rule = MD061ForbiddenTerms::from_config(&config);
365        let content = "FIXME: configured\nTODO: not configured\n";
366        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
367        let result = rule.check(&ctx).unwrap();
368        assert_eq!(result.len(), 1);
369        assert!(result[0].message.contains("forbidden term"));
370        assert!(result[0].message.contains("FIXME"));
371    }
372
373    #[test]
374    fn test_config_from_toml_case_sensitive_by_default() {
375        // Simulates user config: [MD061] terms = ["TODO"]
376        // Without explicitly setting case_sensitive, should default to true
377        let mut config = crate::config::Config::default();
378        let mut rule_config = crate::config::RuleConfig::default();
379        rule_config.values.insert(
380            "terms".to_string(),
381            toml::Value::Array(vec![toml::Value::String("TODO".to_string())]),
382        );
383        config.rules.insert("MD061".to_string(), rule_config);
384
385        let rule = MD061ForbiddenTerms::from_config(&config);
386        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
387        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
388        let result = rule.check(&ctx).unwrap();
389
390        // Should only match "TODO" (uppercase), not "todo" or "Todo"
391        assert_eq!(result.len(), 1);
392        assert_eq!(result[0].line, 2);
393    }
394
395    #[test]
396    fn test_skip_html_comment() {
397        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
398        let content = "<!-- TODO: in html comment -->\nTODO: outside\n";
399        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
400        let result = rule.check(&ctx).unwrap();
401        assert_eq!(result.len(), 1);
402        assert_eq!(result[0].line, 2);
403    }
404
405    #[test]
406    fn test_skip_double_backtick_inline_code() {
407        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
408        let content = "Here is ``TODO`` in double backticks\nTODO: outside\n";
409        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
410        let result = rule.check(&ctx).unwrap();
411        assert_eq!(result.len(), 1);
412        assert_eq!(result[0].line, 2);
413    }
414
415    #[test]
416    fn test_skip_triple_backtick_inline_code() {
417        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
418        let content = "Here is ```TODO``` in triple backticks\nTODO: outside\n";
419        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
420        let result = rule.check(&ctx).unwrap();
421        assert_eq!(result.len(), 1);
422        assert_eq!(result[0].line, 2);
423    }
424
425    #[test]
426    fn test_inline_code_with_backtick_content() {
427        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
428        // Content with a backtick inside: `` `TODO` ``
429        let content = "Use `` `TODO` `` to show a backtick\nTODO: outside\n";
430        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
431        let result = rule.check(&ctx).unwrap();
432        assert_eq!(result.len(), 1);
433        assert_eq!(result[0].line, 2);
434    }
435}