quickmark_core/rules/
md010.rs

1use serde::Deserialize;
2use std::collections::HashSet;
3use std::rc::Rc;
4use tree_sitter::Node;
5
6use crate::{
7    linter::{range_from_tree_sitter, RuleViolation},
8    rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11// MD010-specific configuration types
12#[derive(Debug, PartialEq, Clone, Deserialize)]
13pub struct MD010HardTabsTable {
14    #[serde(default)]
15    pub code_blocks: bool,
16    #[serde(default)]
17    pub ignore_code_languages: Vec<String>,
18    #[serde(default)]
19    pub spaces_per_tab: usize,
20}
21
22impl Default for MD010HardTabsTable {
23    fn default() -> Self {
24        Self {
25            code_blocks: true,
26            ignore_code_languages: Vec::new(),
27            spaces_per_tab: 1,
28        }
29    }
30}
31
32/// MD010 Hard Tabs Rule Linter
33///
34/// **SINGLE-USE CONTRACT**: This linter is designed for one-time use only.
35/// After processing a document (via feed() calls and finalize()), the linter
36/// should be discarded. The violations state is not cleared between uses.
37pub(crate) struct MD010Linter {
38    context: Rc<Context>,
39    violations: Vec<RuleViolation>,
40}
41
42impl MD010Linter {
43    pub fn new(context: Rc<Context>) -> Self {
44        Self {
45            context,
46            violations: Vec::new(),
47        }
48    }
49
50    /// Analyze all lines and store all violations for reporting via finalize().
51    /// Context cache is already initialized by MultiRuleLinter.
52    fn analyze_all_lines(&mut self) {
53        let settings = &self.context.config.linters.settings.hard_tabs;
54        let lines = self.context.lines.borrow();
55
56        // Determine which lines to exclude from hard tab checks.
57        // If `code_blocks` is true (default), we check tabs in code blocks,
58        // but may exclude specific languages via `ignore_code_languages`.
59        // If `code_blocks` is false, we exclude all code blocks entirely.
60        let excluded_lines = if settings.code_blocks {
61            self.get_ignored_language_code_block_lines(settings)
62        } else {
63            self.get_all_code_block_lines()
64        };
65
66        for (line_index, line) in lines.iter().enumerate() {
67            let line_number = line_index + 1;
68
69            if excluded_lines.contains(&line_number) {
70                continue;
71            }
72
73            // Find all hard tabs in the line and create violations.
74            for (char_index, ch) in line.char_indices() {
75                if ch == '\t' {
76                    let violation =
77                        self.create_violation(line_index, char_index, settings.spaces_per_tab);
78                    self.violations.push(violation);
79                }
80            }
81        }
82    }
83
84    /// Returns a set of line numbers from fenced code blocks where the language
85    /// is in the user's ignore list (e.g., `ignore_code_languages = ["python"]`).
86    fn get_ignored_language_code_block_lines(
87        &self,
88        settings: &crate::config::MD010HardTabsTable,
89    ) -> HashSet<usize> {
90        if settings.ignore_code_languages.is_empty() {
91            return HashSet::new();
92        }
93
94        let node_cache = self.context.node_cache.borrow();
95        let mut excluded_lines = HashSet::new();
96
97        if let Some(fenced_code_blocks) = node_cache.get("fenced_code_block") {
98            let lines = self.context.lines.borrow();
99            for node_info in fenced_code_blocks {
100                if let Some(first_line) = lines.get(node_info.line_start) {
101                    if let Some(language) = self.extract_code_block_language(first_line) {
102                        if settings.ignore_code_languages.contains(&language) {
103                            for line_num in (node_info.line_start + 1)..=(node_info.line_end + 1) {
104                                excluded_lines.insert(line_num);
105                            }
106                        }
107                    }
108                }
109            }
110        }
111
112        excluded_lines
113    }
114
115    /// Returns a set of all line numbers that are part of any code block.
116    fn get_all_code_block_lines(&self) -> HashSet<usize> {
117        let node_cache = self.context.node_cache.borrow();
118        ["indented_code_block", "fenced_code_block"]
119            .iter()
120            .filter_map(|kind| node_cache.get(*kind))
121            .flatten()
122            .flat_map(|node_info| (node_info.line_start + 1)..=(node_info.line_end + 1))
123            .collect()
124    }
125
126    /// Extracts the language identifier from a fenced code block's info string.
127    /// This handles common variations like attributes (e.g., ```rust{{...}}).
128    fn extract_code_block_language(&self, line: &str) -> Option<String> {
129        let trimmed = line.trim_start();
130        if !trimmed.starts_with("```") && !trimmed.starts_with("~~~") {
131            return None;
132        }
133
134        let language_part = &trimmed[3..];
135        language_part
136            .split_whitespace()
137            .next()
138            // Handle language specifiers with attributes like ```rust{{...}}
139            .map(|s| s.split('{').next().unwrap_or(s))
140            .filter(|s| !s.is_empty())
141            .map(|s| s.to_lowercase())
142    }
143
144    /// Creates a RuleViolation for a hard tab at the specified position.
145    fn create_violation(
146        &self,
147        line_index: usize,
148        tab_position: usize,
149        spaces_per_tab: usize,
150    ) -> RuleViolation {
151        let message = if spaces_per_tab == 1 {
152            "Hard tabs".to_string()
153        } else {
154            format!("Hard tabs (replace with {spaces_per_tab} spaces)")
155        };
156
157        RuleViolation::new(
158            &MD010,
159            message,
160            self.context.file_path.clone(),
161            range_from_tree_sitter(&tree_sitter::Range {
162                // FIXME: Byte offsets are not correctly calculated as line start offset is unavailable here.
163                // This may result in incorrect highlighting in some tools.
164                // The primary information is in the points (row/column).
165                start_byte: 0,
166                end_byte: 0,
167                start_point: tree_sitter::Point {
168                    row: line_index,
169                    column: tab_position,
170                },
171                end_point: tree_sitter::Point {
172                    row: line_index,
173                    column: tab_position + 1,
174                },
175            }),
176        )
177    }
178}
179
180impl RuleLinter for MD010Linter {
181    fn feed(&mut self, node: &Node) {
182        // This rule is line-based and only needs to run once.
183        // We trigger the analysis on seeing the top-level `document` node.
184        if node.kind() == "document" {
185            self.analyze_all_lines();
186        }
187    }
188
189    fn finalize(&mut self) -> Vec<RuleViolation> {
190        std::mem::take(&mut self.violations)
191    }
192}
193
194pub const MD010: Rule = Rule {
195    id: "MD010",
196    alias: "no-hard-tabs",
197    tags: &["hard_tab", "whitespace"],
198    description: "Hard tabs",
199    rule_type: RuleType::Line,
200    // This is a line-based rule and does not require specific nodes from the AST.
201    // The logic runs once for the entire file content.
202    required_nodes: &[],
203    new_linter: |context| Box::new(MD010Linter::new(context)),
204};
205
206#[cfg(test)]
207mod test {
208    use std::path::PathBuf;
209
210    use crate::config::{LintersSettingsTable, MD010HardTabsTable, RuleSeverity};
211    use crate::linter::MultiRuleLinter;
212    use crate::test_utils::test_helpers::{test_config_with_rules, test_config_with_settings};
213
214    fn test_config() -> crate::config::QuickmarkConfig {
215        test_config_with_rules(vec![
216            ("no-hard-tabs", RuleSeverity::Error),
217            ("heading-style", RuleSeverity::Off),
218            ("heading-increment", RuleSeverity::Off),
219        ])
220    }
221
222    fn test_config_with_hard_tabs(
223        hard_tabs_config: MD010HardTabsTable,
224    ) -> crate::config::QuickmarkConfig {
225        test_config_with_settings(
226            vec![
227                ("no-hard-tabs", RuleSeverity::Error),
228                ("heading-style", RuleSeverity::Off),
229                ("heading-increment", RuleSeverity::Off),
230            ],
231            LintersSettingsTable {
232                hard_tabs: hard_tabs_config,
233                ..Default::default()
234            },
235        )
236    }
237
238    #[test]
239    fn test_basic_hard_tab_violation() {
240        let input = "This line has a hard tab:\tafter this";
241
242        let config = test_config();
243        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
244        let violations = linter.analyze();
245        assert_eq!(1, violations.len());
246
247        let violation = &violations[0];
248        assert_eq!("MD010", violation.rule().id);
249        assert!(violation.message().contains("Hard tabs"));
250    }
251
252    #[test]
253    fn test_no_hard_tabs() {
254        let input = "This line has no hard tabs, only spaces.";
255
256        let config = test_config();
257        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
258        let violations = linter.analyze();
259        assert_eq!(0, violations.len());
260    }
261
262    #[test]
263    fn test_multiple_hard_tabs() {
264        let input = "Line with\ttabs\tin\tmultiple places";
265
266        let config = test_config();
267        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
268        let violations = linter.analyze();
269        assert_eq!(3, violations.len()); // Should report one violation per tab (3 tabs in the line)
270    }
271
272    #[test]
273    fn test_hard_tab_in_code_block_allowed_by_default() {
274        let input = "```\nfunction example() {\n\treturn \"tab indented\";\n}\n```";
275
276        let config = test_config();
277        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
278        let violations = linter.analyze();
279        assert_eq!(1, violations.len()); // Code blocks should be checked by default
280    }
281
282    #[test]
283    fn test_code_blocks_disabled() {
284        let config = test_config_with_hard_tabs(MD010HardTabsTable {
285            code_blocks: false,
286            ignore_code_languages: Vec::new(),
287            spaces_per_tab: 1,
288        });
289
290        let input = "```\nfunction example() {\n\treturn \"tab indented\";\n}\n```";
291
292        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
293        let violations = linter.analyze();
294        assert_eq!(0, violations.len()); // Should not check code blocks when disabled
295    }
296
297    #[test]
298    fn test_ignore_specific_languages() {
299        let config = test_config_with_hard_tabs(MD010HardTabsTable {
300            code_blocks: true,
301            ignore_code_languages: vec!["python".to_string()],
302            spaces_per_tab: 1,
303        });
304
305        let input = "```python\ndef example():\n\treturn \"tab indented\"
306```";
307
308        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
309        let violations = linter.analyze();
310        assert_eq!(0, violations.len()); // Should ignore python code blocks
311    }
312
313    #[test]
314    fn test_custom_spaces_per_tab() {
315        let config = test_config_with_hard_tabs(MD010HardTabsTable {
316            code_blocks: true,
317            ignore_code_languages: Vec::new(),
318            spaces_per_tab: 4,
319        });
320
321        let input = "Line with\thard tab";
322
323        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
324        let violations = linter.analyze();
325        assert_eq!(1, violations.len());
326
327        let violation = &violations[0];
328        assert!(violation.message().contains("4")); // Should suggest 4 spaces
329    }
330
331    #[test]
332    fn test_indented_code_block() {
333        let input = "    This is indented code with\ttab";
334
335        let config = test_config();
336        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
337        let violations = linter.analyze();
338        assert_eq!(1, violations.len()); // Should still flag tabs in indented code blocks by default
339    }
340
341    #[test]
342    fn test_multiple_lines_mixed() {
343        let input = r###"Line without tabs
344Line with	tab
345Another normal line
346Another	line	with	tabs"###;
347
348        let config = test_config();
349        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
350        let violations = linter.analyze();
351        assert_eq!(4, violations.len()); // Should report violations for each tab (1 + 3 tabs)
352    }
353}