rumdl_lib/rules/md060_table_format/
mod.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::range_utils::calculate_line_range;
3use crate::utils::table_utils::TableUtils;
4use unicode_width::UnicodeWidthStr;
5
6mod md060_config;
7use md060_config::MD060Config;
8
9/// Rule MD060: Table Column Alignment
10///
11/// See [docs/md060.md](../../docs/md060.md) for full documentation, configuration, and examples.
12///
13/// This rule enforces consistent column alignment in Markdown tables for improved readability
14/// in source form. When enabled, it ensures table columns are properly aligned with appropriate
15/// padding.
16///
17/// ## Purpose
18///
19/// - **Readability**: Aligned tables are significantly easier to read in source form
20/// - **Maintainability**: Properly formatted tables are easier to edit and review
21/// - **Consistency**: Ensures uniform table formatting throughout documents
22/// - **Developer Experience**: Makes working with tables in plain text more pleasant
23///
24/// ## Configuration Options
25///
26/// The rule supports the following configuration options:
27///
28/// ```yaml
29/// MD060:
30///   enabled: false  # Default: opt-in for conservative adoption
31///   style: "aligned"  # Can be "aligned", "compact", or "none"
32///   max_width: 120  # Optional: auto-compact for wide tables
33/// ```
34///
35/// ### Style Options
36///
37/// - **aligned**: Columns are padded with spaces for visual alignment (default)
38/// - **compact**: No padding, minimal spacing
39/// - **none**: Disable formatting checks
40///
41/// ### Max Width
42///
43/// When `max_width` is set (default: 120), tables wider than this limit will automatically
44/// use compact formatting to prevent excessive line lengths.
45///
46/// ## Examples
47///
48/// ### Aligned Style (Good)
49///
50/// ```markdown
51/// | Name  | Age | City      |
52/// |-------|-----|-----------|
53/// | Alice | 30  | Seattle   |
54/// | Bob   | 25  | Portland  |
55/// ```
56///
57/// ### Unaligned (Bad)
58///
59/// ```markdown
60/// | Name | Age | City |
61/// |---|---|---|
62/// | Alice | 30 | Seattle |
63/// | Bob | 25 | Portland |
64/// ```
65///
66/// ## Unicode Support
67///
68/// This rule properly handles:
69/// - **CJK Characters**: Chinese, Japanese, Korean characters are correctly measured as double-width
70/// - **Basic Emoji**: Most emoji are handled correctly
71/// - **Inline Code**: Pipes in inline code blocks are properly masked
72///
73/// ## Known Limitations
74///
75/// **Complex Emoji Sequences**: Tables containing Zero-Width Joiner (ZWJ) emoji sequences
76/// (e.g., πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦, πŸ‘©β€πŸ’») are automatically skipped. These complex emoji have inconsistent
77/// display widths across different terminals and fonts, making accurate alignment impossible.
78/// The rule will preserve these tables as-is rather than risk corrupting them.
79///
80/// This is an honest limitation of terminal display technology, similar to what other tools
81/// like markdownlint experience.
82///
83/// ## Fix Behavior
84///
85/// When applying automatic fixes, this rule:
86/// - Calculates proper display width for each column using Unicode width measurements
87/// - Pads cells with trailing spaces to align columns
88/// - Preserves cell content exactly (only spacing is modified)
89/// - Respects alignment indicators in delimiter rows (`:---`, `:---:`, `---:`)
90/// - Automatically switches to compact mode for tables exceeding max_width
91/// - Skips tables with ZWJ emoji to prevent corruption
92#[derive(Debug, Default, Clone)]
93pub struct MD060TableFormat {
94    config: MD060Config,
95}
96
97impl MD060TableFormat {
98    pub fn new(enabled: bool, style: String) -> Self {
99        Self {
100            config: MD060Config { enabled, style },
101        }
102    }
103
104    pub fn from_config_struct(config: MD060Config) -> Self {
105        Self { config }
106    }
107
108    fn contains_zwj(text: &str) -> bool {
109        text.contains('\u{200D}')
110    }
111
112    fn calculate_cell_display_width(cell_content: &str) -> usize {
113        let masked = TableUtils::mask_pipes_in_inline_code(cell_content);
114        masked.trim().width()
115    }
116
117    fn parse_table_row(line: &str) -> Vec<String> {
118        let trimmed = line.trim();
119        let masked = TableUtils::mask_pipes_in_inline_code(trimmed);
120
121        let has_leading = masked.starts_with('|');
122        let has_trailing = masked.ends_with('|');
123
124        let mut masked_content = masked.as_str();
125        let mut orig_content = trimmed;
126
127        if has_leading {
128            masked_content = &masked_content[1..];
129            orig_content = &orig_content[1..];
130        }
131        if has_trailing && !masked_content.is_empty() {
132            masked_content = &masked_content[..masked_content.len() - 1];
133            orig_content = &orig_content[..orig_content.len() - 1];
134        }
135
136        let masked_parts: Vec<&str> = masked_content.split('|').collect();
137        let mut cells = Vec::new();
138        let mut pos = 0;
139
140        for masked_cell in masked_parts {
141            let cell_len = masked_cell.len();
142            let orig_cell = if pos + cell_len <= orig_content.len() {
143                &orig_content[pos..pos + cell_len]
144            } else {
145                masked_cell
146            };
147            cells.push(orig_cell.to_string());
148            pos += cell_len + 1;
149        }
150
151        cells
152    }
153
154    fn is_delimiter_row(row: &[String]) -> bool {
155        if row.is_empty() {
156            return false;
157        }
158        row.iter()
159            .all(|cell| cell.trim().chars().all(|c| c == '-' || c == ':' || c.is_whitespace()))
160    }
161
162    fn calculate_column_widths(table_lines: &[&str]) -> Vec<usize> {
163        let mut column_widths = Vec::new();
164
165        for line in table_lines {
166            let cells = Self::parse_table_row(line);
167            for (i, cell) in cells.iter().enumerate() {
168                let width = Self::calculate_cell_display_width(cell);
169                if i >= column_widths.len() {
170                    column_widths.push(width);
171                } else {
172                    column_widths[i] = column_widths[i].max(width);
173                }
174            }
175        }
176
177        // GFM requires delimiter rows to have at least 3 dashes per column.
178        // To ensure visual alignment, all columns must be at least width 3.
179        column_widths.iter().map(|&w| w.max(3)).collect()
180    }
181
182    fn format_table_row(cells: &[String], column_widths: &[usize], is_delimiter: bool) -> String {
183        let formatted_cells: Vec<String> = cells
184            .iter()
185            .enumerate()
186            .map(|(i, cell)| {
187                let target_width = column_widths.get(i).copied().unwrap_or(0);
188                if is_delimiter {
189                    let trimmed = cell.trim();
190                    let has_left_colon = trimmed.starts_with(':');
191                    let has_right_colon = trimmed.ends_with(':');
192
193                    // Delimiter rows use the same cell format as content rows: | content |
194                    // The "content" is dashes, possibly with colons for alignment
195                    let dash_count = if has_left_colon && has_right_colon {
196                        target_width.saturating_sub(2)
197                    } else if has_left_colon || has_right_colon {
198                        target_width.saturating_sub(1)
199                    } else {
200                        target_width
201                    };
202
203                    let dashes = "-".repeat(dash_count.max(3)); // Minimum 3 dashes
204                    let delimiter_content = if has_left_colon && has_right_colon {
205                        format!(":{dashes}:")
206                    } else if has_left_colon {
207                        format!(":{dashes}")
208                    } else if has_right_colon {
209                        format!("{dashes}:")
210                    } else {
211                        dashes
212                    };
213
214                    // Add spaces around delimiter content, just like content cells
215                    format!(" {delimiter_content} ")
216                } else {
217                    let trimmed = cell.trim();
218                    let current_width = Self::calculate_cell_display_width(cell);
219                    let padding = target_width.saturating_sub(current_width);
220                    format!(" {trimmed}{} ", " ".repeat(padding))
221                }
222            })
223            .collect();
224
225        format!("|{}|", formatted_cells.join("|"))
226    }
227
228    fn format_table_compact(cells: &[String]) -> String {
229        let formatted_cells: Vec<String> = cells.iter().map(|cell| format!(" {} ", cell.trim())).collect();
230        format!("|{}|", formatted_cells.join("|"))
231    }
232
233    fn format_table_tight(cells: &[String]) -> String {
234        let formatted_cells: Vec<String> = cells.iter().map(|cell| cell.trim().to_string()).collect();
235        format!("|{}|", formatted_cells.join("|"))
236    }
237
238    fn detect_table_style(table_lines: &[&str]) -> Option<String> {
239        if table_lines.is_empty() {
240            return None;
241        }
242
243        let first_line = table_lines[0];
244        let cells = Self::parse_table_row(first_line);
245
246        if cells.is_empty() {
247            return None;
248        }
249
250        let has_no_padding = cells.iter().all(|cell| !cell.starts_with(' ') && !cell.ends_with(' '));
251
252        let has_single_space = cells.iter().all(|cell| {
253            let trimmed = cell.trim();
254            cell == &format!(" {trimmed} ")
255        });
256
257        if has_no_padding {
258            Some("tight".to_string())
259        } else if has_single_space {
260            Some("compact".to_string())
261        } else {
262            Some("aligned".to_string())
263        }
264    }
265
266    fn fix_table_block(&self, lines: &[&str], table_block: &crate::utils::table_utils::TableBlock) -> Vec<String> {
267        let mut result = Vec::new();
268
269        let table_lines: Vec<&str> = std::iter::once(lines[table_block.header_line])
270            .chain(std::iter::once(lines[table_block.delimiter_line]))
271            .chain(table_block.content_lines.iter().map(|&idx| lines[idx]))
272            .collect();
273
274        if table_lines.iter().any(|line| Self::contains_zwj(line)) {
275            return table_lines.iter().map(|s| s.to_string()).collect();
276        }
277
278        let style = self.config.style.as_str();
279
280        match style {
281            "any" => {
282                let detected_style = Self::detect_table_style(&table_lines);
283                if detected_style.is_none() {
284                    return table_lines.iter().map(|s| s.to_string()).collect();
285                }
286
287                let target_style = detected_style.unwrap();
288                for line in &table_lines {
289                    let cells = Self::parse_table_row(line);
290                    match target_style.as_str() {
291                        "tight" => result.push(Self::format_table_tight(&cells)),
292                        "compact" => result.push(Self::format_table_compact(&cells)),
293                        _ => {
294                            let column_widths = Self::calculate_column_widths(&table_lines);
295                            let is_delimiter = Self::is_delimiter_row(&cells);
296                            result.push(Self::format_table_row(&cells, &column_widths, is_delimiter));
297                        }
298                    }
299                }
300                return result;
301            }
302            "compact" => {
303                for line in table_lines {
304                    let cells = Self::parse_table_row(line);
305                    result.push(Self::format_table_compact(&cells));
306                }
307                return result;
308            }
309            "tight" => {
310                for line in table_lines {
311                    let cells = Self::parse_table_row(line);
312                    result.push(Self::format_table_tight(&cells));
313                }
314                return result;
315            }
316            "aligned" => {
317                let column_widths = Self::calculate_column_widths(&table_lines);
318
319                for line in table_lines {
320                    let cells = Self::parse_table_row(line);
321                    let is_delimiter = Self::is_delimiter_row(&cells);
322                    result.push(Self::format_table_row(&cells, &column_widths, is_delimiter));
323                }
324            }
325            _ => {
326                return table_lines.iter().map(|s| s.to_string()).collect();
327            }
328        }
329
330        result
331    }
332}
333
334impl Rule for MD060TableFormat {
335    fn name(&self) -> &'static str {
336        "MD060"
337    }
338
339    fn description(&self) -> &'static str {
340        "Table columns should be consistently aligned"
341    }
342
343    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
344        !self.config.enabled || !ctx.likely_has_tables()
345    }
346
347    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
348        if !self.config.enabled {
349            return Ok(Vec::new());
350        }
351
352        let content = ctx.content;
353        let line_index = &ctx.line_index;
354        let mut warnings = Vec::new();
355
356        let lines: Vec<&str> = content.lines().collect();
357        let table_blocks = &ctx.table_blocks;
358
359        for table_block in table_blocks {
360            let fixed_lines = self.fix_table_block(&lines, table_block);
361
362            let table_line_indices: Vec<usize> = std::iter::once(table_block.header_line)
363                .chain(std::iter::once(table_block.delimiter_line))
364                .chain(table_block.content_lines.iter().copied())
365                .collect();
366
367            for (i, &line_idx) in table_line_indices.iter().enumerate() {
368                let original = lines[line_idx];
369                let fixed = &fixed_lines[i];
370
371                if original != fixed {
372                    let (start_line, start_col, end_line, end_col) = calculate_line_range(line_idx + 1, original);
373
374                    warnings.push(LintWarning {
375                        rule_name: Some(self.name().to_string()),
376                        severity: Severity::Warning,
377                        message: "Table columns should be aligned".to_string(),
378                        line: start_line,
379                        column: start_col,
380                        end_line,
381                        end_column: end_col,
382                        fix: Some(crate::rule::Fix {
383                            range: line_index.whole_line_range(line_idx + 1),
384                            replacement: if line_idx < lines.len() - 1 {
385                                format!("{fixed}\n")
386                            } else {
387                                fixed.clone()
388                            },
389                        }),
390                    });
391                }
392            }
393        }
394
395        Ok(warnings)
396    }
397
398    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
399        if !self.config.enabled {
400            return Ok(ctx.content.to_string());
401        }
402
403        let content = ctx.content;
404        let lines: Vec<&str> = content.lines().collect();
405        let table_blocks = &ctx.table_blocks;
406
407        let mut result_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
408
409        for table_block in table_blocks {
410            let fixed_lines = self.fix_table_block(&lines, table_block);
411
412            let table_line_indices: Vec<usize> = std::iter::once(table_block.header_line)
413                .chain(std::iter::once(table_block.delimiter_line))
414                .chain(table_block.content_lines.iter().copied())
415                .collect();
416
417            for (i, &line_idx) in table_line_indices.iter().enumerate() {
418                result_lines[line_idx] = fixed_lines[i].clone();
419            }
420        }
421
422        let mut fixed = result_lines.join("\n");
423        if content.ends_with('\n') && !fixed.ends_with('\n') {
424            fixed.push('\n');
425        }
426        Ok(fixed)
427    }
428
429    fn as_any(&self) -> &dyn std::any::Any {
430        self
431    }
432
433    fn default_config_section(&self) -> Option<(String, toml::Value)> {
434        let json_value = serde_json::to_value(&self.config).ok()?;
435        Some((
436            self.name().to_string(),
437            crate::rule_config_serde::json_to_toml_value(&json_value)?,
438        ))
439    }
440
441    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
442    where
443        Self: Sized,
444    {
445        let rule_config = crate::rule_config_serde::load_rule_config::<MD060Config>(config);
446        Box::new(Self::from_config_struct(rule_config))
447    }
448}
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453    use crate::lint_context::LintContext;
454
455    #[test]
456    fn test_md060_disabled_by_default() {
457        let rule = MD060TableFormat::default();
458        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
460
461        let warnings = rule.check(&ctx).unwrap();
462        assert_eq!(warnings.len(), 0);
463
464        let fixed = rule.fix(&ctx).unwrap();
465        assert_eq!(fixed, content);
466    }
467
468    #[test]
469    fn test_md060_align_simple_ascii_table() {
470        let rule = MD060TableFormat::new(true, "aligned".to_string());
471
472        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474
475        let fixed = rule.fix(&ctx).unwrap();
476        let expected = "| Name  | Age |\n| ----- | --- |\n| Alice | 30  |";
477        assert_eq!(fixed, expected);
478
479        // Verify all rows have equal length in aligned mode
480        let lines: Vec<&str> = fixed.lines().collect();
481        assert_eq!(lines[0].len(), lines[1].len());
482        assert_eq!(lines[1].len(), lines[2].len());
483    }
484
485    #[test]
486    fn test_md060_cjk_characters_aligned_correctly() {
487        let rule = MD060TableFormat::new(true, "aligned".to_string());
488
489        let content = "| Name | Age |\n|---|---|\n| δΈ­ζ–‡ | 30 |";
490        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
491
492        let fixed = rule.fix(&ctx).unwrap();
493
494        let lines: Vec<&str> = fixed.lines().collect();
495        let cells_line1 = MD060TableFormat::parse_table_row(lines[0]);
496        let cells_line3 = MD060TableFormat::parse_table_row(lines[2]);
497
498        let width1 = MD060TableFormat::calculate_cell_display_width(&cells_line1[0]);
499        let width3 = MD060TableFormat::calculate_cell_display_width(&cells_line3[0]);
500
501        assert_eq!(width1, width3);
502    }
503
504    #[test]
505    fn test_md060_basic_emoji() {
506        let rule = MD060TableFormat::new(true, "aligned".to_string());
507
508        let content = "| Status | Name |\n|---|---|\n| βœ… | Test |";
509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510
511        let fixed = rule.fix(&ctx).unwrap();
512        assert!(fixed.contains("Status"));
513    }
514
515    #[test]
516    fn test_md060_zwj_emoji_skipped() {
517        let rule = MD060TableFormat::new(true, "aligned".to_string());
518
519        let content = "| Emoji | Name |\n|---|---|\n| πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ | Family |";
520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
521
522        let fixed = rule.fix(&ctx).unwrap();
523        assert_eq!(fixed, content);
524    }
525
526    #[test]
527    fn test_md060_inline_code_with_pipes() {
528        let rule = MD060TableFormat::new(true, "aligned".to_string());
529
530        let content = "| Pattern | Regex |\n|---|---|\n| Time | `[0-9]|[0-9]` |";
531        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532
533        let fixed = rule.fix(&ctx).unwrap();
534        assert!(fixed.contains("`[0-9]|[0-9]`"));
535    }
536
537    #[test]
538    fn test_md060_compact_style() {
539        let rule = MD060TableFormat::new(true, "compact".to_string());
540
541        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
542        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
543
544        let fixed = rule.fix(&ctx).unwrap();
545        let expected = "| Name | Age |\n| --- | --- |\n| Alice | 30 |";
546        assert_eq!(fixed, expected);
547    }
548
549    #[test]
550    fn test_md060_tight_style() {
551        let rule = MD060TableFormat::new(true, "tight".to_string());
552
553        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555
556        let fixed = rule.fix(&ctx).unwrap();
557        let expected = "|Name|Age|\n|---|---|\n|Alice|30|";
558        assert_eq!(fixed, expected);
559    }
560
561    #[test]
562    fn test_md060_any_style_consistency() {
563        let rule = MD060TableFormat::new(true, "any".to_string());
564
565        // Table is already compact, should stay compact
566        let content = "| Name | Age |\n| --- | --- |\n| Alice | 30 |";
567        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
568
569        let fixed = rule.fix(&ctx).unwrap();
570        assert_eq!(fixed, content);
571
572        // Table is aligned, should stay aligned
573        let content_aligned = "| Name  | Age |\n| ----- | --- |\n| Alice | 30  |";
574        let ctx_aligned = LintContext::new(content_aligned, crate::config::MarkdownFlavor::Standard);
575
576        let fixed_aligned = rule.fix(&ctx_aligned).unwrap();
577        assert_eq!(fixed_aligned, content_aligned);
578    }
579
580    #[test]
581    fn test_md060_empty_cells() {
582        let rule = MD060TableFormat::new(true, "aligned".to_string());
583
584        let content = "| A | B |\n|---|---|\n|  | X |";
585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
586
587        let fixed = rule.fix(&ctx).unwrap();
588        assert!(fixed.contains("|"));
589    }
590
591    #[test]
592    fn test_md060_mixed_content() {
593        let rule = MD060TableFormat::new(true, "aligned".to_string());
594
595        let content = "| Name | Age | City |\n|---|---|---|\n| δΈ­ζ–‡ | 30 | NYC |";
596        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
597
598        let fixed = rule.fix(&ctx).unwrap();
599        assert!(fixed.contains("δΈ­ζ–‡"));
600        assert!(fixed.contains("NYC"));
601    }
602
603    #[test]
604    fn test_md060_preserve_alignment_indicators() {
605        let rule = MD060TableFormat::new(true, "aligned".to_string());
606
607        let content = "| Left | Center | Right |\n|:---|:---:|---:|\n| A | B | C |";
608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
609
610        let fixed = rule.fix(&ctx).unwrap();
611
612        assert!(fixed.contains(":---"), "Should contain left alignment");
613        assert!(fixed.contains(":----:"), "Should contain center alignment");
614        assert!(fixed.contains("----:"), "Should contain right alignment");
615    }
616
617    #[test]
618    fn test_md060_minimum_column_width() {
619        let rule = MD060TableFormat::new(true, "aligned".to_string());
620
621        // Test with very short column content to ensure minimum width of 3
622        // GFM requires at least 3 dashes in delimiter rows
623        let content = "| ID | Name |\n|-|-|\n| 1 | A |";
624        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
625
626        let fixed = rule.fix(&ctx).unwrap();
627
628        let lines: Vec<&str> = fixed.lines().collect();
629        assert_eq!(lines[0].len(), lines[1].len());
630        assert_eq!(lines[1].len(), lines[2].len());
631
632        // Verify minimum width is enforced
633        assert!(fixed.contains("ID "), "Short content should be padded");
634        assert!(fixed.contains("---"), "Delimiter should have at least 3 dashes");
635    }
636}