Skip to main content

rumdl_lib/rules/md061_forbidden_terms/
mod.rs

1use crate::filtered_lines::FilteredLinesExt;
2use regex::{Regex, RegexBuilder};
3
4use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6
7mod md061_config;
8pub(super) use md061_config::MD061Config;
9
10/// Rule MD061: Forbidden terms
11///
12/// See [docs/md061.md](../../docs/md061.md) for full documentation, configuration, and examples.
13
14#[derive(Debug, Clone, Default)]
15pub struct MD061ForbiddenTerms {
16    config: MD061Config,
17    pattern: Option<Regex>,
18}
19
20impl MD061ForbiddenTerms {
21    pub fn new(terms: Vec<String>, case_sensitive: bool) -> Self {
22        let config = MD061Config { terms, case_sensitive };
23        let pattern = Self::build_pattern(&config);
24        Self { config, pattern }
25    }
26
27    pub fn from_config_struct(config: MD061Config) -> Self {
28        let pattern = Self::build_pattern(&config);
29        Self { config, pattern }
30    }
31
32    fn build_pattern(config: &MD061Config) -> Option<Regex> {
33        if config.terms.is_empty() {
34            return None;
35        }
36
37        // Build alternation pattern from terms, escaping regex metacharacters
38        let escaped_terms: Vec<String> = config.terms.iter().map(|term| regex::escape(term)).collect();
39        let pattern_str = escaped_terms.join("|");
40
41        RegexBuilder::new(&pattern_str)
42            .case_insensitive(!config.case_sensitive)
43            .build()
44            .ok()
45    }
46
47    /// Check if match is at a word boundary
48    fn is_word_boundary(content: &str, start: usize, end: usize) -> bool {
49        let before_ok = if start == 0 {
50            true
51        } else {
52            content[..start]
53                .chars()
54                .last()
55                .is_none_or(|c| !c.is_alphanumeric() && c != '_')
56        };
57
58        let after_ok = if end >= content.len() {
59            true
60        } else {
61            content[end..]
62                .chars()
63                .next()
64                .is_none_or(|c| !c.is_alphanumeric() && c != '_')
65        };
66
67        before_ok && after_ok
68    }
69}
70
71impl Rule for MD061ForbiddenTerms {
72    fn name(&self) -> &'static str {
73        "MD061"
74    }
75
76    fn description(&self) -> &'static str {
77        "Forbidden terms"
78    }
79
80    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
81        // Early return if no terms configured
82        let Some(pattern) = &self.pattern else {
83            return Ok(Vec::new());
84        };
85
86        let mut warnings = Vec::new();
87
88        // Use filtered_lines to skip frontmatter, code blocks, HTML comments, and Obsidian comments
89        for line in ctx
90            .filtered_lines()
91            .skip_front_matter()
92            .skip_code_blocks()
93            .skip_html_comments()
94            .skip_jsx_expressions()
95            .skip_mdx_comments()
96            .skip_obsidian_comments()
97        {
98            let content = line.content;
99
100            // Find all matches in this line
101            for mat in pattern.find_iter(content) {
102                // Skip if inside inline code (col is 1-indexed)
103                if ctx.is_in_code_span(line.line_num, mat.start() + 1) {
104                    continue;
105                }
106
107                // Check word boundaries
108                if !Self::is_word_boundary(content, mat.start(), mat.end()) {
109                    continue;
110                }
111
112                let matched_term = &content[mat.start()..mat.end()];
113                let display_term = if self.config.case_sensitive {
114                    matched_term.to_string()
115                } else {
116                    matched_term.to_uppercase()
117                };
118
119                warnings.push(LintWarning {
120                    rule_name: Some(self.name().to_string()),
121                    severity: Severity::Warning,
122                    message: format!("Found forbidden term '{display_term}'"),
123                    line: line.line_num,
124                    column: mat.start() + 1,
125                    end_line: line.line_num,
126                    end_column: mat.end() + 1,
127                    fix: None, // No auto-fix for warning comments
128                });
129            }
130        }
131
132        Ok(warnings)
133    }
134
135    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
136        Ok(ctx.content.to_string())
137    }
138
139    fn category(&self) -> RuleCategory {
140        RuleCategory::Other
141    }
142
143    fn fix_capability(&self) -> FixCapability {
144        FixCapability::Unfixable
145    }
146
147    fn as_any(&self) -> &dyn std::any::Any {
148        self
149    }
150
151    fn should_skip(&self, _ctx: &crate::lint_context::LintContext) -> bool {
152        // Skip if no terms configured
153        self.config.terms.is_empty()
154    }
155
156    fn default_config_section(&self) -> Option<(String, toml::Value)> {
157        let default_config = MD061Config::default();
158        let json_value = serde_json::to_value(&default_config).ok()?;
159        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
160
161        if let toml::Value::Table(table) = toml_value {
162            if !table.is_empty() {
163                Some((MD061Config::RULE_NAME.to_string(), toml::Value::Table(table)))
164            } else {
165                None
166            }
167        } else {
168            None
169        }
170    }
171
172    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
173    where
174        Self: Sized,
175    {
176        let rule_config = crate::rule_config_serde::load_rule_config::<MD061Config>(config);
177        Box::new(Self::from_config_struct(rule_config))
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::config::MarkdownFlavor;
185    use crate::lint_context::LintContext;
186
187    #[test]
188    fn test_empty_config_no_warnings() {
189        let rule = MD061ForbiddenTerms::default();
190        let content = "# TODO: This should not trigger\n\nFIXME: This too\n";
191        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
192        let result = rule.check(&ctx).unwrap();
193        assert!(result.is_empty());
194    }
195
196    #[test]
197    fn test_configured_terms_detected() {
198        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
199        let content = "# Heading\n\nTODO: Implement this\n\nFIXME: Fix this bug\n";
200        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
201        let result = rule.check(&ctx).unwrap();
202        assert_eq!(result.len(), 2);
203        assert!(result[0].message.contains("forbidden term"));
204        assert!(result[0].message.contains("TODO"));
205        assert!(result[1].message.contains("forbidden term"));
206        assert!(result[1].message.contains("FIXME"));
207    }
208
209    #[test]
210    fn test_case_sensitive_by_default() {
211        // Default is case-sensitive, so only exact match "TODO" is found
212        let config = MD061Config {
213            terms: vec!["TODO".to_string()],
214            ..Default::default()
215        };
216        let rule = MD061ForbiddenTerms::from_config_struct(config);
217        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
218        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
219        let result = rule.check(&ctx).unwrap();
220        assert_eq!(result.len(), 1);
221        assert_eq!(result[0].line, 2); // Only "TODO" on line 2 matches
222    }
223
224    #[test]
225    fn test_case_insensitive_opt_in() {
226        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
227        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
228        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
229        let result = rule.check(&ctx).unwrap();
230        assert_eq!(result.len(), 3);
231    }
232
233    #[test]
234    fn test_case_sensitive_mode() {
235        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], true);
236        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
237        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
238        let result = rule.check(&ctx).unwrap();
239        assert_eq!(result.len(), 1);
240        assert_eq!(result[0].line, 2);
241    }
242
243    #[test]
244    fn test_word_boundary_no_false_positive() {
245        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
246        let content = "TODOMORROW is not a match\nTODO is a match\n";
247        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
248        let result = rule.check(&ctx).unwrap();
249        assert_eq!(result.len(), 1);
250        assert_eq!(result[0].line, 2);
251    }
252
253    #[test]
254    fn test_word_boundary_with_punctuation() {
255        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
256        let content = "TODO: colon\nTODO. period\n(TODO) parens\n";
257        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
258        let result = rule.check(&ctx).unwrap();
259        assert_eq!(result.len(), 3);
260    }
261
262    #[test]
263    fn test_skip_fenced_code_block() {
264        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
265        let content = "# Heading\n\n```\nTODO: in code block\n```\n\nTODO: outside\n";
266        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
267        let result = rule.check(&ctx).unwrap();
268        assert_eq!(result.len(), 1);
269        assert_eq!(result[0].line, 7);
270    }
271
272    #[test]
273    fn test_skip_indented_code_block() {
274        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
275        let content = "# Heading\n\n    TODO: in indented code\n\nTODO: outside\n";
276        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
277        let result = rule.check(&ctx).unwrap();
278        assert_eq!(result.len(), 1);
279        assert_eq!(result[0].line, 5);
280    }
281
282    #[test]
283    fn test_skip_inline_code() {
284        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
285        let content = "Here is `TODO` in inline code\nTODO: outside inline\n";
286        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
287        let result = rule.check(&ctx).unwrap();
288        assert_eq!(result.len(), 1);
289        assert_eq!(result[0].line, 2);
290    }
291
292    #[test]
293    fn test_skip_frontmatter() {
294        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
295        let content = "---\ntitle: TODO in frontmatter\n---\n\nTODO: outside\n";
296        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
297        let result = rule.check(&ctx).unwrap();
298        assert_eq!(result.len(), 1);
299        assert_eq!(result[0].line, 5);
300    }
301
302    #[test]
303    fn test_multiple_terms_on_same_line() {
304        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string(), "FIXME".to_string()], false);
305        let content = "TODO: first thing FIXME: second thing\n";
306        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
307        let result = rule.check(&ctx).unwrap();
308        assert_eq!(result.len(), 2);
309    }
310
311    #[test]
312    fn test_term_at_start_of_line() {
313        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
314        let content = "TODO at start\n";
315        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
316        let result = rule.check(&ctx).unwrap();
317        assert_eq!(result.len(), 1);
318        assert_eq!(result[0].column, 1);
319    }
320
321    #[test]
322    fn test_term_at_end_of_line() {
323        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
324        let content = "something TODO\n";
325        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
326        let result = rule.check(&ctx).unwrap();
327        assert_eq!(result.len(), 1);
328    }
329
330    #[test]
331    fn test_custom_terms() {
332        let rule = MD061ForbiddenTerms::new(vec!["HACK".to_string(), "XXX".to_string()], false);
333        let content = "HACK: workaround\nXXX: needs review\nTODO: not configured\n";
334        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
335        let result = rule.check(&ctx).unwrap();
336        assert_eq!(result.len(), 2);
337    }
338
339    #[test]
340    fn test_no_fix_available() {
341        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
342        let content = "TODO: something\n";
343        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
344        let result = rule.check(&ctx).unwrap();
345        assert_eq!(result.len(), 1);
346        assert!(result[0].fix.is_none());
347    }
348
349    #[test]
350    fn test_column_positions() {
351        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
352        // Use 2 spaces, not 4 (4 spaces creates a code block)
353        let content = "  TODO: indented\n";
354        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
355        let result = rule.check(&ctx).unwrap();
356        assert_eq!(result.len(), 1);
357        assert_eq!(result[0].column, 3); // 1-based column, TODO starts at col 3
358        assert_eq!(result[0].end_column, 7);
359    }
360
361    #[test]
362    fn test_config_from_toml() {
363        let mut config = crate::config::Config::default();
364        let mut rule_config = crate::config::RuleConfig::default();
365        rule_config.values.insert(
366            "terms".to_string(),
367            toml::Value::Array(vec![toml::Value::String("FIXME".to_string())]),
368        );
369        config.rules.insert("MD061".to_string(), rule_config);
370
371        let rule = MD061ForbiddenTerms::from_config(&config);
372        let content = "FIXME: configured\nTODO: not configured\n";
373        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
374        let result = rule.check(&ctx).unwrap();
375        assert_eq!(result.len(), 1);
376        assert!(result[0].message.contains("forbidden term"));
377        assert!(result[0].message.contains("FIXME"));
378    }
379
380    #[test]
381    fn test_config_from_toml_case_sensitive_by_default() {
382        // Simulates user config: [MD061] terms = ["TODO"]
383        // Without explicitly setting case_sensitive, should default to true
384        let mut config = crate::config::Config::default();
385        let mut rule_config = crate::config::RuleConfig::default();
386        rule_config.values.insert(
387            "terms".to_string(),
388            toml::Value::Array(vec![toml::Value::String("TODO".to_string())]),
389        );
390        config.rules.insert("MD061".to_string(), rule_config);
391
392        let rule = MD061ForbiddenTerms::from_config(&config);
393        let content = "todo: lowercase\nTODO: uppercase\nTodo: mixed\n";
394        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
395        let result = rule.check(&ctx).unwrap();
396
397        // Should only match "TODO" (uppercase), not "todo" or "Todo"
398        assert_eq!(result.len(), 1);
399        assert_eq!(result[0].line, 2);
400    }
401
402    #[test]
403    fn test_skip_html_comment() {
404        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
405        let content = "<!-- TODO: in html comment -->\nTODO: outside\n";
406        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
407        let result = rule.check(&ctx).unwrap();
408        assert_eq!(result.len(), 1);
409        assert_eq!(result[0].line, 2);
410    }
411
412    #[test]
413    fn test_skip_double_backtick_inline_code() {
414        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
415        let content = "Here is ``TODO`` in double backticks\nTODO: outside\n";
416        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
417        let result = rule.check(&ctx).unwrap();
418        assert_eq!(result.len(), 1);
419        assert_eq!(result[0].line, 2);
420    }
421
422    #[test]
423    fn test_skip_triple_backtick_inline_code() {
424        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
425        let content = "Here is ```TODO``` in triple backticks\nTODO: outside\n";
426        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
427        let result = rule.check(&ctx).unwrap();
428        assert_eq!(result.len(), 1);
429        assert_eq!(result[0].line, 2);
430    }
431
432    #[test]
433    fn test_inline_code_with_backtick_content() {
434        let rule = MD061ForbiddenTerms::new(vec!["TODO".to_string()], false);
435        // Content with a backtick inside: `` `TODO` ``
436        let content = "Use `` `TODO` `` to show a backtick\nTODO: outside\n";
437        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
438        let result = rule.check(&ctx).unwrap();
439        assert_eq!(result.len(), 1);
440        assert_eq!(result[0].line, 2);
441    }
442}