Skip to main content

rumdl_lib/rules/md061_forbidden_terms/
mod.rs

1use crate::filtered_lines::FilteredLinesExt;
2use regex::{Regex, RegexBuilder};
3
4use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rule_config_serde::RuleConfig;
6
7mod md061_config;
8pub use md061_config::MD061Config;
9
10/// Rule MD061: Forbidden terms
11///
12/// See [docs/md061.md](../../docs/md061.md) for full documentation, configuration, and examples.
13
14#[derive(Debug, Clone, Default)]
15pub struct MD061ForbiddenTerms {
16    config: MD061Config,
17    pattern: Option<Regex>,
18}
19
20impl MD061ForbiddenTerms {
21    pub fn new(terms: Vec<String>, case_sensitive: bool) -> Self {
22        let config = MD061Config { terms, case_sensitive };
23        let pattern = Self::build_pattern(&config);
24        Self { config, pattern }
25    }
26
27    pub fn from_config_struct(config: MD061Config) -> Self {
28        let pattern = Self::build_pattern(&config);
29        Self { config, pattern }
30    }
31
32    fn build_pattern(config: &MD061Config) -> Option<Regex> {
33        if config.terms.is_empty() {
34            return None;
35        }
36
37        // Build alternation pattern from terms, escaping regex metacharacters
38        let escaped_terms: Vec<String> = config.terms.iter().map(|term| regex::escape(term)).collect();
39        let pattern_str = escaped_terms.join("|");
40
41        RegexBuilder::new(&pattern_str)
42            .case_insensitive(!config.case_sensitive)
43            .build()
44            .ok()
45    }
46
47    /// Check if match is at a word boundary
48    fn is_word_boundary(content: &str, start: usize, end: usize) -> bool {
49        let before_ok = if start == 0 {
50            true
51        } else {
52            content[..start]
53                .chars()
54                .last()
55                .map(|c| !c.is_alphanumeric() && c != '_')
56                .unwrap_or(true)
57        };
58
59        let after_ok = if end >= content.len() {
60            true
61        } else {
62            content[end..]
63                .chars()
64                .next()
65                .map(|c| !c.is_alphanumeric() && c != '_')
66                .unwrap_or(true)
67        };
68
69        before_ok && after_ok
70    }
71}
72
73impl Rule for MD061ForbiddenTerms {
74    fn name(&self) -> &'static str {
75        "MD061"
76    }
77
78    fn description(&self) -> &'static str {
79        "Forbidden terms"
80    }
81
82    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
83        // Early return if no terms configured
84        let pattern = match &self.pattern {
85            Some(p) => p,
86            None => return Ok(Vec::new()),
87        };
88
89        let mut warnings = Vec::new();
90
91        // Use filtered_lines to skip frontmatter, code blocks, HTML comments, and Obsidian comments
92        for line in ctx
93            .filtered_lines()
94            .skip_front_matter()
95            .skip_code_blocks()
96            .skip_html_comments()
97            .skip_obsidian_comments()
98        {
99            let content = line.content;
100
101            // Find all matches in this line
102            for mat in pattern.find_iter(content) {
103                // Skip if inside inline code (col is 1-indexed)
104                if ctx.is_in_code_span(line.line_num, mat.start() + 1) {
105                    continue;
106                }
107
108                // Check word boundaries
109                if !Self::is_word_boundary(content, mat.start(), mat.end()) {
110                    continue;
111                }
112
113                let matched_term = &content[mat.start()..mat.end()];
114                let display_term = if self.config.case_sensitive {
115                    matched_term.to_string()
116                } else {
117                    matched_term.to_uppercase()
118                };
119
120                warnings.push(LintWarning {
121                    rule_name: Some(self.name().to_string()),
122                    severity: Severity::Warning,
123                    message: format!("Found forbidden term '{display_term}'"),
124                    line: line.line_num,
125                    column: mat.start() + 1,
126                    end_line: line.line_num,
127                    end_column: mat.end() + 1,
128                    fix: None, // No auto-fix for warning comments
129                });
130            }
131        }
132
133        Ok(warnings)
134    }
135
136    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
137        // No auto-fix for this rule - return content unchanged
138        Ok(ctx.content.to_string())
139    }
140
141    fn as_any(&self) -> &dyn std::any::Any {
142        self
143    }
144
145    fn should_skip(&self, _ctx: &crate::lint_context::LintContext) -> bool {
146        // Skip if no terms configured
147        self.config.terms.is_empty()
148    }
149
150    fn default_config_section(&self) -> Option<(String, toml::Value)> {
151        let default_config = MD061Config::default();
152        let json_value = serde_json::to_value(&default_config).ok()?;
153        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
154
155        if let toml::Value::Table(table) = toml_value {
156            if !table.is_empty() {
157                Some((MD061Config::RULE_NAME.to_string(), toml::Value::Table(table)))
158            } else {
159                None
160            }
161        } else {
162            None
163        }
164    }
165
166    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
167    where
168        Self: Sized,
169    {
170        let rule_config = crate::rule_config_serde::load_rule_config::<MD061Config>(config);
171        Box::new(Self::from_config_struct(rule_config))
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use crate::config::MarkdownFlavor;
179    use crate::lint_context::LintContext;
180
181    #[test]
182    fn test_empty_config_no_warnings() {
183        let rule = MD061ForbiddenTerms::default();
184        let content = "# TODO: This should not trigger\n\nFIXME: This too\n";
185        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
186        let result = rule.check(&ctx).unwrap();
187        assert!(result.is_empty());
188    }
189
190    #[test]
191    fn test_configured_terms_detected() {
192        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
193        let content = "# Heading\n\nTODO: Implement this\n\nFIXME: Fix this bug\n";
194        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
195        let result = rule.check(&ctx).unwrap();
196        assert_eq!(result.len(), 2);
197        assert!(result[0].message.contains("forbidden term"));
198        assert!(result[0].message.contains("TODO"));
199        assert!(result[1].message.contains("forbidden term"));
200        assert!(result[1].message.contains("FIXME"));
201    }
202
203    #[test]
204    fn test_case_sensitive_by_default() {
205        // Default is case-sensitive, so only exact match "TODO" is found
206        let config = MD061Config {
207            terms: vec!["TODO".to_string()],
208            ..Default::default()
209        };
210        let rule = MD061ForbiddenTerms::from_config_struct(config);
211        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
212        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
213        let result = rule.check(&ctx).unwrap();
214        assert_eq!(result.len(), 1);
215        assert_eq!(result[0].line, 2); // Only "TODO" on line 2 matches
216    }
217
218    #[test]
219    fn test_case_insensitive_opt_in() {
220        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
221        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
222        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
223        let result = rule.check(&ctx).unwrap();
224        assert_eq!(result.len(), 3);
225    }
226
227    #[test]
228    fn test_case_sensitive_mode() {
229        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], true);
230        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
231        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
232        let result = rule.check(&ctx).unwrap();
233        assert_eq!(result.len(), 1);
234        assert_eq!(result[0].line, 2);
235    }
236
237    #[test]
238    fn test_word_boundary_no_false_positive() {
239        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
240        let content = "TODOMORROW is not a match\nTODO is a match\n";
241        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
242        let result = rule.check(&ctx).unwrap();
243        assert_eq!(result.len(), 1);
244        assert_eq!(result[0].line, 2);
245    }
246
247    #[test]
248    fn test_word_boundary_with_punctuation() {
249        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
250        let content = "TODO: colon\nTODO. period\n(TODO) parens\n";
251        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
252        let result = rule.check(&ctx).unwrap();
253        assert_eq!(result.len(), 3);
254    }
255
256    #[test]
257    fn test_skip_fenced_code_block() {
258        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
259        let content = "# Heading\n\n```\nTODO: in code block\n```\n\nTODO: outside\n";
260        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
261        let result = rule.check(&ctx).unwrap();
262        assert_eq!(result.len(), 1);
263        assert_eq!(result[0].line, 7);
264    }
265
266    #[test]
267    fn test_skip_indented_code_block() {
268        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
269        let content = "# Heading\n\n    TODO: in indented code\n\nTODO: outside\n";
270        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
271        let result = rule.check(&ctx).unwrap();
272        assert_eq!(result.len(), 1);
273        assert_eq!(result[0].line, 5);
274    }
275
276    #[test]
277    fn test_skip_inline_code() {
278        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
279        let content = "Here is `TODO` in inline code\nTODO: outside inline\n";
280        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
281        let result = rule.check(&ctx).unwrap();
282        assert_eq!(result.len(), 1);
283        assert_eq!(result[0].line, 2);
284    }
285
286    #[test]
287    fn test_skip_frontmatter() {
288        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
289        let content = "---\ntitle: TODO in frontmatter\n---\n\nTODO: outside\n";
290        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
291        let result = rule.check(&ctx).unwrap();
292        assert_eq!(result.len(), 1);
293        assert_eq!(result[0].line, 5);
294    }
295
296    #[test]
297    fn test_multiple_terms_on_same_line() {
298        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
299        let content = "TODO: first thing FIXME: second thing\n";
300        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
301        let result = rule.check(&ctx).unwrap();
302        assert_eq!(result.len(), 2);
303    }
304
305    #[test]
306    fn test_term_at_start_of_line() {
307        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
308        let content = "TODO at start\n";
309        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
310        let result = rule.check(&ctx).unwrap();
311        assert_eq!(result.len(), 1);
312        assert_eq!(result[0].column, 1);
313    }
314
315    #[test]
316    fn test_term_at_end_of_line() {
317        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
318        let content = "something TODO\n";
319        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
320        let result = rule.check(&ctx).unwrap();
321        assert_eq!(result.len(), 1);
322    }
323
324    #[test]
325    fn test_custom_terms() {
326        let rule = MD061ForbiddenTerms::new(vec!["HACK".to_string(), "XXX".to_string()], false);
327        let content = "HACK: workaround\nXXX: needs review\nTODO: not configured\n";
328        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
329        let result = rule.check(&ctx).unwrap();
330        assert_eq!(result.len(), 2);
331    }
332
333    #[test]
334    fn test_no_fix_available() {
335        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
336        let content = "TODO: something\n";
337        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
338        let result = rule.check(&ctx).unwrap();
339        assert_eq!(result.len(), 1);
340        assert!(result[0].fix.is_none());
341    }
342
343    #[test]
344    fn test_column_positions() {
345        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
346        // Use 2 spaces, not 4 (4 spaces creates a code block)
347        let content = "  TODO: indented\n";
348        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
349        let result = rule.check(&ctx).unwrap();
350        assert_eq!(result.len(), 1);
351        assert_eq!(result[0].column, 3); // 1-based column, TODO starts at col 3
352        assert_eq!(result[0].end_column, 7);
353    }
354
355    #[test]
356    fn test_config_from_toml() {
357        let mut config = crate::config::Config::default();
358        let mut rule_config = crate::config::RuleConfig::default();
359        rule_config.values.insert(
360            "terms".to_string(),
361            toml::Value::Array(vec![toml::Value::String("FIXME".to_string())]),
362        );
363        config.rules.insert("MD061".to_string(), rule_config);
364
365        let rule = MD061ForbiddenTerms::from_config(&config);
366        let content = "FIXME: configured\nTODO: not configured\n";
367        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
368        let result = rule.check(&ctx).unwrap();
369        assert_eq!(result.len(), 1);
370        assert!(result[0].message.contains("forbidden term"));
371        assert!(result[0].message.contains("FIXME"));
372    }
373
374    #[test]
375    fn test_config_from_toml_case_sensitive_by_default() {
376        // Simulates user config: [MD061] terms = ["TODO"]
377        // Without explicitly setting case_sensitive, should default to true
378        let mut config = crate::config::Config::default();
379        let mut rule_config = crate::config::RuleConfig::default();
380        rule_config.values.insert(
381            "terms".to_string(),
382            toml::Value::Array(vec![toml::Value::String("TODO".to_string())]),
383        );
384        config.rules.insert("MD061".to_string(), rule_config);
385
386        let rule = MD061ForbiddenTerms::from_config(&config);
387        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
388        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
389        let result = rule.check(&ctx).unwrap();
390
391        // Should only match "TODO" (uppercase), not "todo" or "Todo"
392        assert_eq!(result.len(), 1);
393        assert_eq!(result[0].line, 2);
394    }
395
396    #[test]
397    fn test_skip_html_comment() {
398        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
399        let content = "<!-- TODO: in html comment -->\nTODO: outside\n";
400        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
401        let result = rule.check(&ctx).unwrap();
402        assert_eq!(result.len(), 1);
403        assert_eq!(result[0].line, 2);
404    }
405
406    #[test]
407    fn test_skip_double_backtick_inline_code() {
408        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
409        let content = "Here is ``TODO`` in double backticks\nTODO: outside\n";
410        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
411        let result = rule.check(&ctx).unwrap();
412        assert_eq!(result.len(), 1);
413        assert_eq!(result[0].line, 2);
414    }
415
416    #[test]
417    fn test_skip_triple_backtick_inline_code() {
418        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
419        let content = "Here is ```TODO``` in triple backticks\nTODO: outside\n";
420        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
421        let result = rule.check(&ctx).unwrap();
422        assert_eq!(result.len(), 1);
423        assert_eq!(result[0].line, 2);
424    }
425
426    #[test]
427    fn test_inline_code_with_backtick_content() {
428        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
429        // Content with a backtick inside: `` `TODO` ``
430        let content = "Use `` `TODO` `` to show a backtick\nTODO: outside\n";
431        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
432        let result = rule.check(&ctx).unwrap();
433        assert_eq!(result.len(), 1);
434        assert_eq!(result[0].line, 2);
435    }
436}