Skip to main content

rumdl_lib/rules/md061_forbidden_terms/
mod.rs

1use crate::filtered_lines::FilteredLinesExt;
2use regex::{Regex, RegexBuilder};
3
4use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rule_config_serde::RuleConfig;
6
7mod md061_config;
8pub use md061_config::MD061Config;
9
10/// Rule MD061: Forbidden terms
11///
12/// See [docs/md061.md](../../docs/md061.md) for full documentation, configuration, and examples.
13
14#[derive(Debug, Clone, Default)]
15pub struct MD061ForbiddenTerms {
16    config: MD061Config,
17    pattern: Option<Regex>,
18}
19
20impl MD061ForbiddenTerms {
21    pub fn new(terms: Vec<String>, case_sensitive: bool) -> Self {
22        let config = MD061Config { terms, case_sensitive };
23        let pattern = Self::build_pattern(&config);
24        Self { config, pattern }
25    }
26
27    pub fn from_config_struct(config: MD061Config) -> Self {
28        let pattern = Self::build_pattern(&config);
29        Self { config, pattern }
30    }
31
32    fn build_pattern(config: &MD061Config) -> Option<Regex> {
33        if config.terms.is_empty() {
34            return None;
35        }
36
37        // Build alternation pattern from terms, escaping regex metacharacters
38        let escaped_terms: Vec<String> = config.terms.iter().map(|term| regex::escape(term)).collect();
39        let pattern_str = escaped_terms.join("|");
40
41        RegexBuilder::new(&pattern_str)
42            .case_insensitive(!config.case_sensitive)
43            .build()
44            .ok()
45    }
46
47    /// Check if match is at a word boundary
48    fn is_word_boundary(content: &str, start: usize, end: usize) -> bool {
49        let before_ok = if start == 0 {
50            true
51        } else {
52            content[..start]
53                .chars()
54                .last()
55                .map(|c| !c.is_alphanumeric() && c != '_')
56                .unwrap_or(true)
57        };
58
59        let after_ok = if end >= content.len() {
60            true
61        } else {
62            content[end..]
63                .chars()
64                .next()
65                .map(|c| !c.is_alphanumeric() && c != '_')
66                .unwrap_or(true)
67        };
68
69        before_ok && after_ok
70    }
71}
72
73impl Rule for MD061ForbiddenTerms {
74    fn name(&self) -> &'static str {
75        "MD061"
76    }
77
78    fn description(&self) -> &'static str {
79        "Forbidden terms"
80    }
81
82    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
83        // Early return if no terms configured
84        let pattern = match &self.pattern {
85            Some(p) => p,
86            None => return Ok(Vec::new()),
87        };
88
89        let mut warnings = Vec::new();
90
91        // Use filtered_lines to skip frontmatter, code blocks, HTML comments, and Obsidian comments
92        for line in ctx
93            .filtered_lines()
94            .skip_front_matter()
95            .skip_code_blocks()
96            .skip_html_comments()
97            .skip_jsx_expressions()
98            .skip_mdx_comments()
99            .skip_obsidian_comments()
100        {
101            let content = line.content;
102
103            // Find all matches in this line
104            for mat in pattern.find_iter(content) {
105                // Skip if inside inline code (col is 1-indexed)
106                if ctx.is_in_code_span(line.line_num, mat.start() + 1) {
107                    continue;
108                }
109
110                // Check word boundaries
111                if !Self::is_word_boundary(content, mat.start(), mat.end()) {
112                    continue;
113                }
114
115                let matched_term = &content[mat.start()..mat.end()];
116                let display_term = if self.config.case_sensitive {
117                    matched_term.to_string()
118                } else {
119                    matched_term.to_uppercase()
120                };
121
122                warnings.push(LintWarning {
123                    rule_name: Some(self.name().to_string()),
124                    severity: Severity::Warning,
125                    message: format!("Found forbidden term '{display_term}'"),
126                    line: line.line_num,
127                    column: mat.start() + 1,
128                    end_line: line.line_num,
129                    end_column: mat.end() + 1,
130                    fix: None, // No auto-fix for warning comments
131                });
132            }
133        }
134
135        Ok(warnings)
136    }
137
138    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
139        // No auto-fix for this rule - return content unchanged
140        Ok(ctx.content.to_string())
141    }
142
143    fn as_any(&self) -> &dyn std::any::Any {
144        self
145    }
146
147    fn should_skip(&self, _ctx: &crate::lint_context::LintContext) -> bool {
148        // Skip if no terms configured
149        self.config.terms.is_empty()
150    }
151
152    fn default_config_section(&self) -> Option<(String, toml::Value)> {
153        let default_config = MD061Config::default();
154        let json_value = serde_json::to_value(&default_config).ok()?;
155        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
156
157        if let toml::Value::Table(table) = toml_value {
158            if !table.is_empty() {
159                Some((MD061Config::RULE_NAME.to_string(), toml::Value::Table(table)))
160            } else {
161                None
162            }
163        } else {
164            None
165        }
166    }
167
168    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
169    where
170        Self: Sized,
171    {
172        let rule_config = crate::rule_config_serde::load_rule_config::<MD061Config>(config);
173        Box::new(Self::from_config_struct(rule_config))
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use crate::config::MarkdownFlavor;
181    use crate::lint_context::LintContext;
182
183    #[test]
184    fn test_empty_config_no_warnings() {
185        let rule = MD061ForbiddenTerms::default();
186        let content = "# TODO: This should not trigger\n\nFIXME: This too\n";
187        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
188        let result = rule.check(&ctx).unwrap();
189        assert!(result.is_empty());
190    }
191
192    #[test]
193    fn test_configured_terms_detected() {
194        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
195        let content = "# Heading\n\nTODO: Implement this\n\nFIXME: Fix this bug\n";
196        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
197        let result = rule.check(&ctx).unwrap();
198        assert_eq!(result.len(), 2);
199        assert!(result[0].message.contains("forbidden term"));
200        assert!(result[0].message.contains("TODO"));
201        assert!(result[1].message.contains("forbidden term"));
202        assert!(result[1].message.contains("FIXME"));
203    }
204
205    #[test]
206    fn test_case_sensitive_by_default() {
207        // Default is case-sensitive, so only exact match "TODO" is found
208        let config = MD061Config {
209            terms: vec!["TODO".to_string()],
210            ..Default::default()
211        };
212        let rule = MD061ForbiddenTerms::from_config_struct(config);
213        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
214        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
215        let result = rule.check(&ctx).unwrap();
216        assert_eq!(result.len(), 1);
217        assert_eq!(result[0].line, 2); // Only "TODO" on line 2 matches
218    }
219
220    #[test]
221    fn test_case_insensitive_opt_in() {
222        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
223        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
224        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
225        let result = rule.check(&ctx).unwrap();
226        assert_eq!(result.len(), 3);
227    }
228
229    #[test]
230    fn test_case_sensitive_mode() {
231        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], true);
232        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
233        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
234        let result = rule.check(&ctx).unwrap();
235        assert_eq!(result.len(), 1);
236        assert_eq!(result[0].line, 2);
237    }
238
239    #[test]
240    fn test_word_boundary_no_false_positive() {
241        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
242        let content = "TODOMORROW is not a match\nTODO is a match\n";
243        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
244        let result = rule.check(&ctx).unwrap();
245        assert_eq!(result.len(), 1);
246        assert_eq!(result[0].line, 2);
247    }
248
249    #[test]
250    fn test_word_boundary_with_punctuation() {
251        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
252        let content = "TODO: colon\nTODO. period\n(TODO) parens\n";
253        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
254        let result = rule.check(&ctx).unwrap();
255        assert_eq!(result.len(), 3);
256    }
257
258    #[test]
259    fn test_skip_fenced_code_block() {
260        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
261        let content = "# Heading\n\n```\nTODO: in code block\n```\n\nTODO: outside\n";
262        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
263        let result = rule.check(&ctx).unwrap();
264        assert_eq!(result.len(), 1);
265        assert_eq!(result[0].line, 7);
266    }
267
268    #[test]
269    fn test_skip_indented_code_block() {
270        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
271        let content = "# Heading\n\n    TODO: in indented code\n\nTODO: outside\n";
272        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
273        let result = rule.check(&ctx).unwrap();
274        assert_eq!(result.len(), 1);
275        assert_eq!(result[0].line, 5);
276    }
277
278    #[test]
279    fn test_skip_inline_code() {
280        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
281        let content = "Here is `TODO` in inline code\nTODO: outside inline\n";
282        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
283        let result = rule.check(&ctx).unwrap();
284        assert_eq!(result.len(), 1);
285        assert_eq!(result[0].line, 2);
286    }
287
288    #[test]
289    fn test_skip_frontmatter() {
290        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
291        let content = "---\ntitle: TODO in frontmatter\n---\n\nTODO: outside\n";
292        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
293        let result = rule.check(&ctx).unwrap();
294        assert_eq!(result.len(), 1);
295        assert_eq!(result[0].line, 5);
296    }
297
298    #[test]
299    fn test_multiple_terms_on_same_line() {
300        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
301        let content = "TODO: first thing FIXME: second thing\n";
302        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
303        let result = rule.check(&ctx).unwrap();
304        assert_eq!(result.len(), 2);
305    }
306
307    #[test]
308    fn test_term_at_start_of_line() {
309        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
310        let content = "TODO at start\n";
311        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
312        let result = rule.check(&ctx).unwrap();
313        assert_eq!(result.len(), 1);
314        assert_eq!(result[0].column, 1);
315    }
316
317    #[test]
318    fn test_term_at_end_of_line() {
319        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
320        let content = "something TODO\n";
321        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
322        let result = rule.check(&ctx).unwrap();
323        assert_eq!(result.len(), 1);
324    }
325
326    #[test]
327    fn test_custom_terms() {
328        let rule = MD061ForbiddenTerms::new(vec!["HACK".to_string(), "XXX".to_string()], false);
329        let content = "HACK: workaround\nXXX: needs review\nTODO: not configured\n";
330        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
331        let result = rule.check(&ctx).unwrap();
332        assert_eq!(result.len(), 2);
333    }
334
335    #[test]
336    fn test_no_fix_available() {
337        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
338        let content = "TODO: something\n";
339        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
340        let result = rule.check(&ctx).unwrap();
341        assert_eq!(result.len(), 1);
342        assert!(result[0].fix.is_none());
343    }
344
345    #[test]
346    fn test_column_positions() {
347        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
348        // Use 2 spaces, not 4 (4 spaces creates a code block)
349        let content = "  TODO: indented\n";
350        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
351        let result = rule.check(&ctx).unwrap();
352        assert_eq!(result.len(), 1);
353        assert_eq!(result[0].column, 3); // 1-based column, TODO starts at col 3
354        assert_eq!(result[0].end_column, 7);
355    }
356
357    #[test]
358    fn test_config_from_toml() {
359        let mut config = crate::config::Config::default();
360        let mut rule_config = crate::config::RuleConfig::default();
361        rule_config.values.insert(
362            "terms".to_string(),
363            toml::Value::Array(vec![toml::Value::String("FIXME".to_string())]),
364        );
365        config.rules.insert("MD061".to_string(), rule_config);
366
367        let rule = MD061ForbiddenTerms::from_config(&config);
368        let content = "FIXME: configured\nTODO: not configured\n";
369        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
370        let result = rule.check(&ctx).unwrap();
371        assert_eq!(result.len(), 1);
372        assert!(result[0].message.contains("forbidden term"));
373        assert!(result[0].message.contains("FIXME"));
374    }
375
376    #[test]
377    fn test_config_from_toml_case_sensitive_by_default() {
378        // Simulates user config: [MD061] terms = ["TODO"]
379        // Without explicitly setting case_sensitive, should default to true
380        let mut config = crate::config::Config::default();
381        let mut rule_config = crate::config::RuleConfig::default();
382        rule_config.values.insert(
383            "terms".to_string(),
384            toml::Value::Array(vec![toml::Value::String("TODO".to_string())]),
385        );
386        config.rules.insert("MD061".to_string(), rule_config);
387
388        let rule = MD061ForbiddenTerms::from_config(&config);
389        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
390        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
391        let result = rule.check(&ctx).unwrap();
392
393        // Should only match "TODO" (uppercase), not "todo" or "Todo"
394        assert_eq!(result.len(), 1);
395        assert_eq!(result[0].line, 2);
396    }
397
398    #[test]
399    fn test_skip_html_comment() {
400        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
401        let content = "<!-- TODO: in html comment -->\nTODO: outside\n";
402        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
403        let result = rule.check(&ctx).unwrap();
404        assert_eq!(result.len(), 1);
405        assert_eq!(result[0].line, 2);
406    }
407
408    #[test]
409    fn test_skip_double_backtick_inline_code() {
410        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
411        let content = "Here is ``TODO`` in double backticks\nTODO: outside\n";
412        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
413        let result = rule.check(&ctx).unwrap();
414        assert_eq!(result.len(), 1);
415        assert_eq!(result[0].line, 2);
416    }
417
418    #[test]
419    fn test_skip_triple_backtick_inline_code() {
420        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
421        let content = "Here is ```TODO``` in triple backticks\nTODO: outside\n";
422        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
423        let result = rule.check(&ctx).unwrap();
424        assert_eq!(result.len(), 1);
425        assert_eq!(result[0].line, 2);
426    }
427
428    #[test]
429    fn test_inline_code_with_backtick_content() {
430        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
431        // Content with a backtick inside: `` `TODO` ``
432        let content = "Use `` `TODO` `` to show a backtick\nTODO: outside\n";
433        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
434        let result = rule.check(&ctx).unwrap();
435        assert_eq!(result.len(), 1);
436        assert_eq!(result[0].line, 2);
437    }
438}