Skip to main content

rumdl_lib/rules/
md010_no_hard_tabs.rs

1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3/// Rule MD010: No tabs
4///
5/// See [docs/md010.md](../../docs/md010.md) for full documentation, configuration, and examples.
6use crate::utils::range_utils::calculate_match_range;
7use crate::utils::regex_cache::{HTML_COMMENT_END, HTML_COMMENT_START};
8
9mod md010_config;
10use md010_config::MD010Config;
11
12// HTML comment patterns are now imported from regex_cache
13
14/// Rule MD010: Hard tabs
15#[derive(Clone, Default)]
16pub struct MD010NoHardTabs {
17    config: MD010Config,
18}
19
20impl MD010NoHardTabs {
21    pub fn new(spaces_per_tab: usize) -> Self {
22        Self {
23            config: MD010Config {
24                spaces_per_tab: crate::types::PositiveUsize::from_const(spaces_per_tab),
25            },
26        }
27    }
28
29    pub const fn from_config_struct(config: MD010Config) -> Self {
30        Self { config }
31    }
32
33    // Identify lines that are part of HTML comments
34    fn find_html_comment_lines(lines: &[&str]) -> Vec<bool> {
35        let mut in_html_comment = false;
36        let mut html_comment_lines = vec![false; lines.len()];
37
38        for (i, line) in lines.iter().enumerate() {
39            // Check if this line has a comment start
40            let has_comment_start = HTML_COMMENT_START.is_match(line);
41            // Check if this line has a comment end
42            let has_comment_end = HTML_COMMENT_END.is_match(line);
43
44            if has_comment_start && !has_comment_end && !in_html_comment {
45                // Comment starts on this line and doesn't end
46                in_html_comment = true;
47                html_comment_lines[i] = true;
48            } else if has_comment_end && in_html_comment {
49                // Comment ends on this line
50                html_comment_lines[i] = true;
51                in_html_comment = false;
52            } else if has_comment_start && has_comment_end {
53                // Both start and end on the same line
54                html_comment_lines[i] = true;
55            } else if in_html_comment {
56                // We're inside a multi-line comment
57                html_comment_lines[i] = true;
58            }
59        }
60
61        html_comment_lines
62    }
63
64    fn count_leading_tabs(line: &str) -> usize {
65        let mut count = 0;
66        for c in line.chars() {
67            if c == '\t' {
68                count += 1;
69            } else {
70                break;
71            }
72        }
73        count
74    }
75
76    fn find_and_group_tabs(line: &str) -> Vec<(usize, usize)> {
77        let mut groups = Vec::new();
78        let mut current_group_start: Option<usize> = None;
79        let mut last_tab_pos = 0;
80
81        for (i, c) in line.chars().enumerate() {
82            if c == '\t' {
83                if let Some(start) = current_group_start {
84                    // We're in a group - check if this tab is consecutive
85                    if i == last_tab_pos + 1 {
86                        // Consecutive tab, continue the group
87                        last_tab_pos = i;
88                    } else {
89                        // Gap found, save current group and start new one
90                        groups.push((start, last_tab_pos + 1));
91                        current_group_start = Some(i);
92                        last_tab_pos = i;
93                    }
94                } else {
95                    // Start a new group
96                    current_group_start = Some(i);
97                    last_tab_pos = i;
98                }
99            }
100        }
101
102        // Add the last group if there is one
103        if let Some(start) = current_group_start {
104            groups.push((start, last_tab_pos + 1));
105        }
106
107        groups
108    }
109
110    /// Find lines that are inside fenced code blocks (``` or ~~~)
111    /// Returns a Vec<bool> where index i indicates if line i is inside a fenced code block
112    fn find_fenced_code_block_lines(lines: &[&str]) -> Vec<bool> {
113        let mut in_fenced_block = false;
114        let mut fence_char: Option<char> = None;
115        let mut result = vec![false; lines.len()];
116
117        for (i, line) in lines.iter().enumerate() {
118            let trimmed = line.trim_start();
119
120            if !in_fenced_block {
121                // Check for opening fence (``` or ~~~)
122                if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
123                    in_fenced_block = true;
124                    fence_char = Some(trimmed.chars().next().unwrap());
125                    result[i] = true; // Mark the fence line itself as "in fenced block"
126                }
127            } else {
128                result[i] = true;
129                // Check for closing fence (must match opening fence char)
130                if let Some(fc) = fence_char {
131                    let fence_str: String = std::iter::repeat_n(fc, 3).collect();
132                    if trimmed.starts_with(&fence_str) && trimmed.trim() == fence_str {
133                        in_fenced_block = false;
134                        fence_char = None;
135                    }
136                }
137            }
138        }
139
140        result
141    }
142}
143
144impl Rule for MD010NoHardTabs {
145    fn name(&self) -> &'static str {
146        "MD010"
147    }
148
149    fn description(&self) -> &'static str {
150        "No tabs"
151    }
152
153    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
154        let content = ctx.content;
155        let _line_index = &ctx.line_index;
156
157        let mut warnings = Vec::new();
158        let lines: Vec<&str> = content.lines().collect();
159
160        // Pre-compute which lines are part of HTML comments
161        let html_comment_lines = Self::find_html_comment_lines(&lines);
162
163        // Pre-compute which lines are inside fenced code blocks (``` or ~~~)
164        // We only skip fenced code blocks - code has its own formatting rules
165        // (e.g., Makefiles require tabs, Go uses tabs by convention)
166        // We still flag tab-indented content because it might be accidental
167        let fenced_code_block_lines = Self::find_fenced_code_block_lines(&lines);
168
169        for (line_num, &line) in lines.iter().enumerate() {
170            // Skip if in HTML comment
171            if html_comment_lines[line_num] {
172                continue;
173            }
174
175            // Skip if in fenced code block - code has its own formatting rules
176            if fenced_code_block_lines[line_num] {
177                continue;
178            }
179
180            // Skip lines inside PyMdown blocks (MkDocs flavor)
181            if ctx.line_info(line_num + 1).is_some_and(|info| info.in_pymdown_block) {
182                continue;
183            }
184
185            // Process tabs directly without intermediate collection
186            let tab_groups = Self::find_and_group_tabs(line);
187            if tab_groups.is_empty() {
188                continue;
189            }
190
191            let leading_tabs = Self::count_leading_tabs(line);
192
193            // Generate warning for each group of consecutive tabs
194            for (start_pos, end_pos) in tab_groups {
195                let tab_count = end_pos - start_pos;
196                let is_leading = start_pos < leading_tabs;
197
198                // Calculate precise character range for the tab group
199                let (start_line, start_col, end_line, end_col) =
200                    calculate_match_range(line_num + 1, line, start_pos, tab_count);
201
202                let message = if line.trim().is_empty() {
203                    if tab_count == 1 {
204                        "Empty line contains tab".to_string()
205                    } else {
206                        format!("Empty line contains {tab_count} tabs")
207                    }
208                } else if is_leading {
209                    if tab_count == 1 {
210                        format!(
211                            "Found leading tab, use {} spaces instead",
212                            self.config.spaces_per_tab.get()
213                        )
214                    } else {
215                        format!(
216                            "Found {} leading tabs, use {} spaces instead",
217                            tab_count,
218                            tab_count * self.config.spaces_per_tab.get()
219                        )
220                    }
221                } else if tab_count == 1 {
222                    "Found tab for alignment, use spaces instead".to_string()
223                } else {
224                    format!("Found {tab_count} tabs for alignment, use spaces instead")
225                };
226
227                warnings.push(LintWarning {
228                    rule_name: Some(self.name().to_string()),
229                    line: start_line,
230                    column: start_col,
231                    end_line,
232                    end_column: end_col,
233                    message,
234                    severity: Severity::Warning,
235                    fix: Some(Fix {
236                        range: _line_index.line_col_to_byte_range_with_length(line_num + 1, start_pos + 1, tab_count),
237                        replacement: " ".repeat(tab_count * self.config.spaces_per_tab.get()),
238                    }),
239                });
240            }
241        }
242
243        Ok(warnings)
244    }
245
246    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
247        let content = ctx.content;
248
249        let mut result = String::new();
250        let lines: Vec<&str> = content.lines().collect();
251
252        // Pre-compute which lines are part of HTML comments
253        let html_comment_lines = Self::find_html_comment_lines(&lines);
254
255        // Pre-compute which lines are inside fenced code blocks
256        // Only skip fenced code blocks - code has its own formatting rules
257        // (e.g., Makefiles require tabs, Go uses tabs by convention)
258        let fenced_code_block_lines = Self::find_fenced_code_block_lines(&lines);
259
260        for (i, line) in lines.iter().enumerate() {
261            if html_comment_lines[i] {
262                // Preserve HTML comments as they are
263                result.push_str(line);
264            } else if fenced_code_block_lines[i] {
265                // Preserve fenced code blocks as-is - code has its own formatting rules
266                result.push_str(line);
267            } else {
268                // Replace tabs with spaces in regular markdown content
269                // (including tab-indented content which might be accidental)
270                result.push_str(&line.replace('\t', &" ".repeat(self.config.spaces_per_tab.get())));
271            }
272
273            // Add newline if not the last line without a newline
274            if i < lines.len() - 1 || content.ends_with('\n') {
275                result.push('\n');
276            }
277        }
278
279        Ok(result)
280    }
281
282    fn as_any(&self) -> &dyn std::any::Any {
283        self
284    }
285
286    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
287        // Skip if content is empty or has no tabs
288        ctx.content.is_empty() || !ctx.has_char('\t')
289    }
290
291    fn category(&self) -> RuleCategory {
292        RuleCategory::Whitespace
293    }
294
295    fn default_config_section(&self) -> Option<(String, toml::Value)> {
296        let default_config = MD010Config::default();
297        let json_value = serde_json::to_value(&default_config).ok()?;
298        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
299
300        if let toml::Value::Table(table) = toml_value {
301            if !table.is_empty() {
302                Some((MD010Config::RULE_NAME.to_string(), toml::Value::Table(table)))
303            } else {
304                None
305            }
306        } else {
307            None
308        }
309    }
310
311    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
312    where
313        Self: Sized,
314    {
315        let rule_config = crate::rule_config_serde::load_rule_config::<MD010Config>(config);
316        Box::new(Self::from_config_struct(rule_config))
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use crate::lint_context::LintContext;
324    use crate::rule::Rule;
325
326    #[test]
327    fn test_no_tabs() {
328        let rule = MD010NoHardTabs::default();
329        let content = "This is a line\nAnother line\nNo tabs here";
330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
331        let result = rule.check(&ctx).unwrap();
332        assert!(result.is_empty());
333    }
334
335    #[test]
336    fn test_single_tab() {
337        let rule = MD010NoHardTabs::default();
338        let content = "Line with\ttab";
339        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
340        let result = rule.check(&ctx).unwrap();
341        assert_eq!(result.len(), 1);
342        assert_eq!(result[0].line, 1);
343        assert_eq!(result[0].column, 10);
344        assert_eq!(result[0].message, "Found tab for alignment, use spaces instead");
345    }
346
347    #[test]
348    fn test_leading_tabs() {
349        let rule = MD010NoHardTabs::default();
350        let content = "\tIndented line\n\t\tDouble indented";
351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
352        let result = rule.check(&ctx).unwrap();
353        assert_eq!(result.len(), 2);
354        assert_eq!(result[0].line, 1);
355        assert_eq!(result[0].message, "Found leading tab, use 4 spaces instead");
356        assert_eq!(result[1].line, 2);
357        assert_eq!(result[1].message, "Found 2 leading tabs, use 8 spaces instead");
358    }
359
360    #[test]
361    fn test_fix_tabs() {
362        let rule = MD010NoHardTabs::default();
363        let content = "\tIndented\nNormal\tline\nNo tabs";
364        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
365        let fixed = rule.fix(&ctx).unwrap();
366        assert_eq!(fixed, "    Indented\nNormal    line\nNo tabs");
367    }
368
369    #[test]
370    fn test_custom_spaces_per_tab() {
371        let rule = MD010NoHardTabs::new(4);
372        let content = "\tIndented";
373        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
374        let fixed = rule.fix(&ctx).unwrap();
375        assert_eq!(fixed, "    Indented");
376    }
377
378    #[test]
379    fn test_code_blocks_always_ignored() {
380        let rule = MD010NoHardTabs::default();
381        let content = "Normal\tline\n```\nCode\twith\ttab\n```\nAnother\tline";
382        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
383        let result = rule.check(&ctx).unwrap();
384        // Should only flag tabs outside code blocks - code has its own formatting rules
385        assert_eq!(result.len(), 2);
386        assert_eq!(result[0].line, 1);
387        assert_eq!(result[1].line, 5);
388
389        let fixed = rule.fix(&ctx).unwrap();
390        assert_eq!(fixed, "Normal    line\n```\nCode\twith\ttab\n```\nAnother    line");
391    }
392
393    #[test]
394    fn test_code_blocks_never_checked() {
395        let rule = MD010NoHardTabs::default();
396        let content = "```\nCode\twith\ttab\n```";
397        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
398        let result = rule.check(&ctx).unwrap();
399        // Should never flag tabs in code blocks - code has its own formatting rules
400        // (e.g., Makefiles require tabs, Go uses tabs by convention)
401        assert_eq!(result.len(), 0);
402    }
403
404    #[test]
405    fn test_html_comments_ignored() {
406        let rule = MD010NoHardTabs::default();
407        let content = "Normal\tline\n<!-- HTML\twith\ttab -->\nAnother\tline";
408        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
409        let result = rule.check(&ctx).unwrap();
410        // Should not flag tabs in HTML comments
411        assert_eq!(result.len(), 2);
412        assert_eq!(result[0].line, 1);
413        assert_eq!(result[1].line, 3);
414    }
415
416    #[test]
417    fn test_multiline_html_comments() {
418        let rule = MD010NoHardTabs::default();
419        let content = "Before\n<!--\nMultiline\twith\ttabs\ncomment\t-->\nAfter\ttab";
420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
421        let result = rule.check(&ctx).unwrap();
422        // Should only flag the tab after the comment
423        assert_eq!(result.len(), 1);
424        assert_eq!(result[0].line, 5);
425    }
426
427    #[test]
428    fn test_empty_lines_with_tabs() {
429        let rule = MD010NoHardTabs::default();
430        let content = "Normal line\n\t\t\n\t\nAnother line";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
432        let result = rule.check(&ctx).unwrap();
433        assert_eq!(result.len(), 2);
434        assert_eq!(result[0].message, "Empty line contains 2 tabs");
435        assert_eq!(result[1].message, "Empty line contains tab");
436    }
437
438    #[test]
439    fn test_mixed_tabs_and_spaces() {
440        let rule = MD010NoHardTabs::default();
441        let content = " \tMixed indentation\n\t Mixed again";
442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
443        let result = rule.check(&ctx).unwrap();
444        assert_eq!(result.len(), 2);
445    }
446
447    #[test]
448    fn test_consecutive_tabs() {
449        let rule = MD010NoHardTabs::default();
450        let content = "Text\t\t\tthree tabs\tand\tanother";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
452        let result = rule.check(&ctx).unwrap();
453        // Should group consecutive tabs
454        assert_eq!(result.len(), 3);
455        assert_eq!(result[0].message, "Found 3 tabs for alignment, use spaces instead");
456    }
457
458    #[test]
459    fn test_find_and_group_tabs() {
460        // Test finding and grouping tabs in one pass
461        let groups = MD010NoHardTabs::find_and_group_tabs("a\tb\tc");
462        assert_eq!(groups, vec![(1, 2), (3, 4)]);
463
464        let groups = MD010NoHardTabs::find_and_group_tabs("\t\tabc");
465        assert_eq!(groups, vec![(0, 2)]);
466
467        let groups = MD010NoHardTabs::find_and_group_tabs("no tabs");
468        assert!(groups.is_empty());
469
470        // Test with consecutive and non-consecutive tabs
471        let groups = MD010NoHardTabs::find_and_group_tabs("\t\t\ta\t\tb");
472        assert_eq!(groups, vec![(0, 3), (4, 6)]);
473
474        let groups = MD010NoHardTabs::find_and_group_tabs("\ta\tb\tc");
475        assert_eq!(groups, vec![(0, 1), (2, 3), (4, 5)]);
476    }
477
478    #[test]
479    fn test_count_leading_tabs() {
480        assert_eq!(MD010NoHardTabs::count_leading_tabs("\t\tcode"), 2);
481        assert_eq!(MD010NoHardTabs::count_leading_tabs(" \tcode"), 0);
482        assert_eq!(MD010NoHardTabs::count_leading_tabs("no tabs"), 0);
483        assert_eq!(MD010NoHardTabs::count_leading_tabs("\t"), 1);
484    }
485
486    #[test]
487    fn test_default_config() {
488        let rule = MD010NoHardTabs::default();
489        let config = rule.default_config_section();
490        assert!(config.is_some());
491        let (name, _value) = config.unwrap();
492        assert_eq!(name, "MD010");
493    }
494
495    #[test]
496    fn test_from_config() {
497        // Test that custom config values are properly loaded
498        let custom_spaces = 8;
499        let rule = MD010NoHardTabs::new(custom_spaces);
500        let content = "\tTab";
501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
502        let fixed = rule.fix(&ctx).unwrap();
503        assert_eq!(fixed, "        Tab");
504
505        // Code blocks are always ignored
506        let content_with_code = "```\n\tTab in code\n```";
507        let ctx = LintContext::new(content_with_code, crate::config::MarkdownFlavor::Standard, None);
508        let result = rule.check(&ctx).unwrap();
509        // Tabs in code blocks are never flagged
510        assert!(result.is_empty());
511    }
512
513    #[test]
514    fn test_performance_large_document() {
515        let rule = MD010NoHardTabs::default();
516        let mut content = String::new();
517        for i in 0..1000 {
518            content.push_str(&format!("Line {i}\twith\ttabs\n"));
519        }
520        let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
521        let result = rule.check(&ctx).unwrap();
522        assert_eq!(result.len(), 2000);
523    }
524
525    #[test]
526    fn test_preserve_content() {
527        let rule = MD010NoHardTabs::default();
528        let content = "**Bold**\ttext\n*Italic*\ttext\n[Link](url)\ttab";
529        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
530        let fixed = rule.fix(&ctx).unwrap();
531        assert_eq!(fixed, "**Bold**    text\n*Italic*    text\n[Link](url)    tab");
532    }
533
534    #[test]
535    fn test_edge_cases() {
536        let rule = MD010NoHardTabs::default();
537
538        // Tab at end of line
539        let content = "Text\t";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
541        let result = rule.check(&ctx).unwrap();
542        assert_eq!(result.len(), 1);
543
544        // Only tabs
545        let content = "\t\t\t";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
547        let result = rule.check(&ctx).unwrap();
548        assert_eq!(result.len(), 1);
549        assert_eq!(result[0].message, "Empty line contains 3 tabs");
550    }
551
552    #[test]
553    fn test_code_blocks_always_preserved_in_fix() {
554        let rule = MD010NoHardTabs::default();
555
556        let content = "Text\twith\ttab\n```makefile\ntarget:\n\tcommand\n\tanother\n```\nMore\ttabs";
557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
558        let fixed = rule.fix(&ctx).unwrap();
559
560        // Tabs in code blocks are preserved - code has its own formatting rules
561        // (e.g., Makefiles require tabs, Go uses tabs by convention)
562        let expected = "Text    with    tab\n```makefile\ntarget:\n\tcommand\n\tanother\n```\nMore    tabs";
563        assert_eq!(fixed, expected);
564    }
565
566    #[test]
567    fn test_find_html_comment_lines() {
568        let lines = vec!["Normal", "<!-- Start", "Middle", "End -->", "After"];
569        let result = MD010NoHardTabs::find_html_comment_lines(&lines);
570        assert_eq!(result, vec![false, true, true, true, false]);
571
572        let lines = vec!["<!-- Single line comment -->", "Normal"];
573        let result = MD010NoHardTabs::find_html_comment_lines(&lines);
574        assert_eq!(result, vec![true, false]);
575    }
576}