rumdl_lib/rules/md060_table_format/
mod.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
2use crate::utils::range_utils::calculate_line_range;
3use crate::utils::table_utils::TableUtils;
4use unicode_width::UnicodeWidthStr;
5
6mod md060_config;
7use md060_config::MD060Config;
8
9/// Rule MD060: Table Column Alignment
10///
11/// See [docs/md060.md](../../docs/md060.md) for full documentation, configuration, and examples.
12///
13/// This rule enforces consistent column alignment in Markdown tables for improved readability
14/// in source form. When enabled, it ensures table columns are properly aligned with appropriate
15/// padding.
16///
17/// ## Purpose
18///
19/// - **Readability**: Aligned tables are significantly easier to read in source form
20/// - **Maintainability**: Properly formatted tables are easier to edit and review
21/// - **Consistency**: Ensures uniform table formatting throughout documents
22/// - **Developer Experience**: Makes working with tables in plain text more pleasant
23///
24/// ## Configuration Options
25///
26/// The rule supports the following configuration options:
27///
28/// ```yaml
29/// MD060:
30///   enabled: false  # Default: opt-in for conservative adoption
31///   style: "aligned"  # Can be "aligned", "compact", or "none"
32///   max_width: 120  # Optional: auto-compact for wide tables
33/// ```
34///
35/// ### Style Options
36///
37/// - **aligned**: Columns are padded with spaces for visual alignment (default)
38/// - **compact**: No padding, minimal spacing
39/// - **none**: Disable formatting checks
40///
41/// ### Max Width
42///
43/// When `max_width` is set (default: 120), tables wider than this limit will automatically
44/// use compact formatting to prevent excessive line lengths.
45///
46/// ## Examples
47///
48/// ### Aligned Style (Good)
49///
50/// ```markdown
51/// | Name  | Age | City      |
52/// |-------|-----|-----------|
53/// | Alice | 30  | Seattle   |
54/// | Bob   | 25  | Portland  |
55/// ```
56///
57/// ### Unaligned (Bad)
58///
59/// ```markdown
60/// | Name | Age | City |
61/// |---|---|---|
62/// | Alice | 30 | Seattle |
63/// | Bob | 25 | Portland |
64/// ```
65///
66/// ## Unicode Support
67///
68/// This rule properly handles:
69/// - **CJK Characters**: Chinese, Japanese, Korean characters are correctly measured as double-width
70/// - **Basic Emoji**: Most emoji are handled correctly
71/// - **Inline Code**: Pipes in inline code blocks are properly masked
72///
73/// ## Known Limitations
74///
75/// **Complex Emoji Sequences**: Tables containing Zero-Width Joiner (ZWJ) emoji sequences
76/// (e.g., πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦, πŸ‘©β€πŸ’») are automatically skipped. These complex emoji have inconsistent
77/// display widths across different terminals and fonts, making accurate alignment impossible.
78/// The rule will preserve these tables as-is rather than risk corrupting them.
79///
80/// This is an honest limitation of terminal display technology, similar to what other tools
81/// like markdownlint experience.
82///
83/// ## Fix Behavior
84///
85/// When applying automatic fixes, this rule:
86/// - Calculates proper display width for each column using Unicode width measurements
87/// - Pads cells with trailing spaces to align columns
88/// - Preserves cell content exactly (only spacing is modified)
89/// - Respects alignment indicators in delimiter rows (`:---`, `:---:`, `---:`)
90/// - Automatically switches to compact mode for tables exceeding max_width
91/// - Skips tables with ZWJ emoji to prevent corruption
92#[derive(Debug, Default, Clone)]
93pub struct MD060TableFormat {
94    config: MD060Config,
95}
96
97impl MD060TableFormat {
98    pub fn new(enabled: bool, style: String) -> Self {
99        Self {
100            config: MD060Config { enabled, style },
101        }
102    }
103
104    pub fn from_config_struct(config: MD060Config) -> Self {
105        Self { config }
106    }
107
108    fn contains_zwj(text: &str) -> bool {
109        text.contains('\u{200D}')
110    }
111
112    fn calculate_cell_display_width(cell_content: &str) -> usize {
113        let masked = TableUtils::mask_pipes_in_inline_code(cell_content);
114        masked.trim().width()
115    }
116
117    fn parse_table_row(line: &str) -> Vec<String> {
118        let trimmed = line.trim();
119        let masked = TableUtils::mask_pipes_in_inline_code(trimmed);
120
121        let has_leading = masked.starts_with('|');
122        let has_trailing = masked.ends_with('|');
123
124        let mut masked_content = masked.as_str();
125        let mut orig_content = trimmed;
126
127        if has_leading {
128            masked_content = &masked_content[1..];
129            orig_content = &orig_content[1..];
130        }
131        if has_trailing && !masked_content.is_empty() {
132            masked_content = &masked_content[..masked_content.len() - 1];
133            orig_content = &orig_content[..orig_content.len() - 1];
134        }
135
136        let masked_parts: Vec<&str> = masked_content.split('|').collect();
137        let mut cells = Vec::new();
138        let mut pos = 0;
139
140        for masked_cell in masked_parts {
141            let cell_len = masked_cell.len();
142            let orig_cell = if pos + cell_len <= orig_content.len() {
143                &orig_content[pos..pos + cell_len]
144            } else {
145                masked_cell
146            };
147            cells.push(orig_cell.to_string());
148            pos += cell_len + 1;
149        }
150
151        cells
152    }
153
154    fn is_delimiter_row(row: &[String]) -> bool {
155        if row.is_empty() {
156            return false;
157        }
158        row.iter().all(|cell| {
159            let trimmed = cell.trim();
160            // A delimiter cell must contain at least one dash
161            // Empty cells are not delimiter cells
162            !trimmed.is_empty()
163                && trimmed.contains('-')
164                && trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace())
165        })
166    }
167
168    fn calculate_column_widths(table_lines: &[&str]) -> Vec<usize> {
169        let mut column_widths = Vec::new();
170        let mut delimiter_cells: Option<Vec<String>> = None;
171
172        for line in table_lines {
173            let cells = Self::parse_table_row(line);
174
175            // Save delimiter row for later processing, but don't use it for width calculation
176            if Self::is_delimiter_row(&cells) {
177                delimiter_cells = Some(cells);
178                continue;
179            }
180
181            for (i, cell) in cells.iter().enumerate() {
182                let width = Self::calculate_cell_display_width(cell);
183                if i >= column_widths.len() {
184                    column_widths.push(width);
185                } else {
186                    column_widths[i] = column_widths[i].max(width);
187                }
188            }
189        }
190
191        // GFM requires delimiter rows to have at least 3 dashes per column.
192        // To ensure visual alignment, all columns must be at least width 3.
193        let mut final_widths: Vec<usize> = column_widths.iter().map(|&w| w.max(3)).collect();
194
195        // Adjust column widths to accommodate alignment indicators (colons) in delimiter row
196        // This ensures the delimiter row has the same length as content rows
197        if let Some(delimiter_cells) = delimiter_cells {
198            for (i, cell) in delimiter_cells.iter().enumerate() {
199                if i < final_widths.len() {
200                    let trimmed = cell.trim();
201                    let has_left_colon = trimmed.starts_with(':');
202                    let has_right_colon = trimmed.ends_with(':');
203                    let colon_count = (has_left_colon as usize) + (has_right_colon as usize);
204
205                    // Minimum width needed: 3 dashes + colons
206                    let min_width_for_delimiter = 3 + colon_count;
207                    final_widths[i] = final_widths[i].max(min_width_for_delimiter);
208                }
209            }
210        }
211
212        final_widths
213    }
214
215    fn format_table_row(cells: &[String], column_widths: &[usize], is_delimiter: bool) -> String {
216        let formatted_cells: Vec<String> = cells
217            .iter()
218            .enumerate()
219            .map(|(i, cell)| {
220                let target_width = column_widths.get(i).copied().unwrap_or(0);
221                if is_delimiter {
222                    let trimmed = cell.trim();
223                    let has_left_colon = trimmed.starts_with(':');
224                    let has_right_colon = trimmed.ends_with(':');
225
226                    // Delimiter rows use the same cell format as content rows: | content |
227                    // The "content" is dashes, possibly with colons for alignment
228                    let dash_count = if has_left_colon && has_right_colon {
229                        target_width.saturating_sub(2)
230                    } else if has_left_colon || has_right_colon {
231                        target_width.saturating_sub(1)
232                    } else {
233                        target_width
234                    };
235
236                    let dashes = "-".repeat(dash_count.max(3)); // Minimum 3 dashes
237                    let delimiter_content = if has_left_colon && has_right_colon {
238                        format!(":{dashes}:")
239                    } else if has_left_colon {
240                        format!(":{dashes}")
241                    } else if has_right_colon {
242                        format!("{dashes}:")
243                    } else {
244                        dashes
245                    };
246
247                    // Add spaces around delimiter content, just like content cells
248                    format!(" {delimiter_content} ")
249                } else {
250                    let trimmed = cell.trim();
251                    let current_width = Self::calculate_cell_display_width(cell);
252                    let padding = target_width.saturating_sub(current_width);
253                    format!(" {trimmed}{} ", " ".repeat(padding))
254                }
255            })
256            .collect();
257
258        format!("|{}|", formatted_cells.join("|"))
259    }
260
261    fn format_table_compact(cells: &[String]) -> String {
262        let formatted_cells: Vec<String> = cells.iter().map(|cell| format!(" {} ", cell.trim())).collect();
263        format!("|{}|", formatted_cells.join("|"))
264    }
265
266    fn format_table_tight(cells: &[String]) -> String {
267        let formatted_cells: Vec<String> = cells.iter().map(|cell| cell.trim().to_string()).collect();
268        format!("|{}|", formatted_cells.join("|"))
269    }
270
271    fn detect_table_style(table_lines: &[&str]) -> Option<String> {
272        if table_lines.is_empty() {
273            return None;
274        }
275
276        let first_line = table_lines[0];
277        let cells = Self::parse_table_row(first_line);
278
279        if cells.is_empty() {
280            return None;
281        }
282
283        let has_no_padding = cells.iter().all(|cell| !cell.starts_with(' ') && !cell.ends_with(' '));
284
285        let has_single_space = cells.iter().all(|cell| {
286            let trimmed = cell.trim();
287            cell == &format!(" {trimmed} ")
288        });
289
290        if has_no_padding {
291            Some("tight".to_string())
292        } else if has_single_space {
293            Some("compact".to_string())
294        } else {
295            Some("aligned".to_string())
296        }
297    }
298
299    fn fix_table_block(&self, lines: &[&str], table_block: &crate::utils::table_utils::TableBlock) -> Vec<String> {
300        let mut result = Vec::new();
301
302        let table_lines: Vec<&str> = std::iter::once(lines[table_block.header_line])
303            .chain(std::iter::once(lines[table_block.delimiter_line]))
304            .chain(table_block.content_lines.iter().map(|&idx| lines[idx]))
305            .collect();
306
307        if table_lines.iter().any(|line| Self::contains_zwj(line)) {
308            return table_lines.iter().map(|s| s.to_string()).collect();
309        }
310
311        let style = self.config.style.as_str();
312
313        match style {
314            "any" => {
315                let detected_style = Self::detect_table_style(&table_lines);
316                if detected_style.is_none() {
317                    return table_lines.iter().map(|s| s.to_string()).collect();
318                }
319
320                let target_style = detected_style.unwrap();
321                for line in &table_lines {
322                    let cells = Self::parse_table_row(line);
323                    match target_style.as_str() {
324                        "tight" => result.push(Self::format_table_tight(&cells)),
325                        "compact" => result.push(Self::format_table_compact(&cells)),
326                        _ => {
327                            let column_widths = Self::calculate_column_widths(&table_lines);
328                            let is_delimiter = Self::is_delimiter_row(&cells);
329                            result.push(Self::format_table_row(&cells, &column_widths, is_delimiter));
330                        }
331                    }
332                }
333                return result;
334            }
335            "compact" => {
336                for line in table_lines {
337                    let cells = Self::parse_table_row(line);
338                    result.push(Self::format_table_compact(&cells));
339                }
340                return result;
341            }
342            "tight" => {
343                for line in table_lines {
344                    let cells = Self::parse_table_row(line);
345                    result.push(Self::format_table_tight(&cells));
346                }
347                return result;
348            }
349            "aligned" => {
350                let column_widths = Self::calculate_column_widths(&table_lines);
351
352                for line in table_lines {
353                    let cells = Self::parse_table_row(line);
354                    let is_delimiter = Self::is_delimiter_row(&cells);
355                    result.push(Self::format_table_row(&cells, &column_widths, is_delimiter));
356                }
357            }
358            _ => {
359                return table_lines.iter().map(|s| s.to_string()).collect();
360            }
361        }
362
363        result
364    }
365}
366
367impl Rule for MD060TableFormat {
368    fn name(&self) -> &'static str {
369        "MD060"
370    }
371
372    fn description(&self) -> &'static str {
373        "Table columns should be consistently aligned"
374    }
375
376    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
377        !self.config.enabled || !ctx.likely_has_tables()
378    }
379
380    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
381        if !self.config.enabled {
382            return Ok(Vec::new());
383        }
384
385        let content = ctx.content;
386        let line_index = &ctx.line_index;
387        let mut warnings = Vec::new();
388
389        let lines: Vec<&str> = content.lines().collect();
390        let table_blocks = &ctx.table_blocks;
391
392        for table_block in table_blocks {
393            let fixed_lines = self.fix_table_block(&lines, table_block);
394
395            let table_line_indices: Vec<usize> = std::iter::once(table_block.header_line)
396                .chain(std::iter::once(table_block.delimiter_line))
397                .chain(table_block.content_lines.iter().copied())
398                .collect();
399
400            for (i, &line_idx) in table_line_indices.iter().enumerate() {
401                let original = lines[line_idx];
402                let fixed = &fixed_lines[i];
403
404                if original != fixed {
405                    let (start_line, start_col, end_line, end_col) = calculate_line_range(line_idx + 1, original);
406
407                    warnings.push(LintWarning {
408                        rule_name: Some(self.name().to_string()),
409                        severity: Severity::Warning,
410                        message: "Table columns should be aligned".to_string(),
411                        line: start_line,
412                        column: start_col,
413                        end_line,
414                        end_column: end_col,
415                        fix: Some(crate::rule::Fix {
416                            range: line_index.whole_line_range(line_idx + 1),
417                            replacement: if line_idx < lines.len() - 1 {
418                                format!("{fixed}\n")
419                            } else {
420                                fixed.clone()
421                            },
422                        }),
423                    });
424                }
425            }
426        }
427
428        Ok(warnings)
429    }
430
431    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
432        if !self.config.enabled {
433            return Ok(ctx.content.to_string());
434        }
435
436        let content = ctx.content;
437        let lines: Vec<&str> = content.lines().collect();
438        let table_blocks = &ctx.table_blocks;
439
440        let mut result_lines: Vec<String> = lines.iter().map(|&s| s.to_string()).collect();
441
442        for table_block in table_blocks {
443            let fixed_lines = self.fix_table_block(&lines, table_block);
444
445            let table_line_indices: Vec<usize> = std::iter::once(table_block.header_line)
446                .chain(std::iter::once(table_block.delimiter_line))
447                .chain(table_block.content_lines.iter().copied())
448                .collect();
449
450            for (i, &line_idx) in table_line_indices.iter().enumerate() {
451                result_lines[line_idx] = fixed_lines[i].clone();
452            }
453        }
454
455        let mut fixed = result_lines.join("\n");
456        if content.ends_with('\n') && !fixed.ends_with('\n') {
457            fixed.push('\n');
458        }
459        Ok(fixed)
460    }
461
462    fn as_any(&self) -> &dyn std::any::Any {
463        self
464    }
465
466    fn default_config_section(&self) -> Option<(String, toml::Value)> {
467        let json_value = serde_json::to_value(&self.config).ok()?;
468        Some((
469            self.name().to_string(),
470            crate::rule_config_serde::json_to_toml_value(&json_value)?,
471        ))
472    }
473
474    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
475    where
476        Self: Sized,
477    {
478        let rule_config = crate::rule_config_serde::load_rule_config::<MD060Config>(config);
479        Box::new(Self::from_config_struct(rule_config))
480    }
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486    use crate::lint_context::LintContext;
487
488    #[test]
489    fn test_md060_disabled_by_default() {
490        let rule = MD060TableFormat::default();
491        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
493
494        let warnings = rule.check(&ctx).unwrap();
495        assert_eq!(warnings.len(), 0);
496
497        let fixed = rule.fix(&ctx).unwrap();
498        assert_eq!(fixed, content);
499    }
500
501    #[test]
502    fn test_md060_align_simple_ascii_table() {
503        let rule = MD060TableFormat::new(true, "aligned".to_string());
504
505        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
506        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
507
508        let fixed = rule.fix(&ctx).unwrap();
509        let expected = "| Name  | Age |\n| ----- | --- |\n| Alice | 30  |";
510        assert_eq!(fixed, expected);
511
512        // Verify all rows have equal length in aligned mode
513        let lines: Vec<&str> = fixed.lines().collect();
514        assert_eq!(lines[0].len(), lines[1].len());
515        assert_eq!(lines[1].len(), lines[2].len());
516    }
517
518    #[test]
519    fn test_md060_cjk_characters_aligned_correctly() {
520        let rule = MD060TableFormat::new(true, "aligned".to_string());
521
522        let content = "| Name | Age |\n|---|---|\n| δΈ­ζ–‡ | 30 |";
523        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
524
525        let fixed = rule.fix(&ctx).unwrap();
526
527        let lines: Vec<&str> = fixed.lines().collect();
528        let cells_line1 = MD060TableFormat::parse_table_row(lines[0]);
529        let cells_line3 = MD060TableFormat::parse_table_row(lines[2]);
530
531        let width1 = MD060TableFormat::calculate_cell_display_width(&cells_line1[0]);
532        let width3 = MD060TableFormat::calculate_cell_display_width(&cells_line3[0]);
533
534        assert_eq!(width1, width3);
535    }
536
537    #[test]
538    fn test_md060_basic_emoji() {
539        let rule = MD060TableFormat::new(true, "aligned".to_string());
540
541        let content = "| Status | Name |\n|---|---|\n| βœ… | Test |";
542        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
543
544        let fixed = rule.fix(&ctx).unwrap();
545        assert!(fixed.contains("Status"));
546    }
547
548    #[test]
549    fn test_md060_zwj_emoji_skipped() {
550        let rule = MD060TableFormat::new(true, "aligned".to_string());
551
552        let content = "| Emoji | Name |\n|---|---|\n| πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ | Family |";
553        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
554
555        let fixed = rule.fix(&ctx).unwrap();
556        assert_eq!(fixed, content);
557    }
558
559    #[test]
560    fn test_md060_inline_code_with_pipes() {
561        let rule = MD060TableFormat::new(true, "aligned".to_string());
562
563        let content = "| Pattern | Regex |\n|---|---|\n| Time | `[0-9]|[0-9]` |";
564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
565
566        let fixed = rule.fix(&ctx).unwrap();
567        assert!(fixed.contains("`[0-9]|[0-9]`"));
568    }
569
570    #[test]
571    fn test_md060_compact_style() {
572        let rule = MD060TableFormat::new(true, "compact".to_string());
573
574        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
575        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
576
577        let fixed = rule.fix(&ctx).unwrap();
578        let expected = "| Name | Age |\n| --- | --- |\n| Alice | 30 |";
579        assert_eq!(fixed, expected);
580    }
581
582    #[test]
583    fn test_md060_tight_style() {
584        let rule = MD060TableFormat::new(true, "tight".to_string());
585
586        let content = "| Name | Age |\n|---|---|\n| Alice | 30 |";
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588
589        let fixed = rule.fix(&ctx).unwrap();
590        let expected = "|Name|Age|\n|---|---|\n|Alice|30|";
591        assert_eq!(fixed, expected);
592    }
593
594    #[test]
595    fn test_md060_any_style_consistency() {
596        let rule = MD060TableFormat::new(true, "any".to_string());
597
598        // Table is already compact, should stay compact
599        let content = "| Name | Age |\n| --- | --- |\n| Alice | 30 |";
600        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
601
602        let fixed = rule.fix(&ctx).unwrap();
603        assert_eq!(fixed, content);
604
605        // Table is aligned, should stay aligned
606        let content_aligned = "| Name  | Age |\n| ----- | --- |\n| Alice | 30  |";
607        let ctx_aligned = LintContext::new(content_aligned, crate::config::MarkdownFlavor::Standard);
608
609        let fixed_aligned = rule.fix(&ctx_aligned).unwrap();
610        assert_eq!(fixed_aligned, content_aligned);
611    }
612
613    #[test]
614    fn test_md060_empty_cells() {
615        let rule = MD060TableFormat::new(true, "aligned".to_string());
616
617        let content = "| A | B |\n|---|---|\n|  | X |";
618        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
619
620        let fixed = rule.fix(&ctx).unwrap();
621        assert!(fixed.contains("|"));
622    }
623
624    #[test]
625    fn test_md060_mixed_content() {
626        let rule = MD060TableFormat::new(true, "aligned".to_string());
627
628        let content = "| Name | Age | City |\n|---|---|---|\n| δΈ­ζ–‡ | 30 | NYC |";
629        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
630
631        let fixed = rule.fix(&ctx).unwrap();
632        assert!(fixed.contains("δΈ­ζ–‡"));
633        assert!(fixed.contains("NYC"));
634    }
635
636    #[test]
637    fn test_md060_preserve_alignment_indicators() {
638        let rule = MD060TableFormat::new(true, "aligned".to_string());
639
640        let content = "| Left | Center | Right |\n|:---|:---:|---:|\n| A | B | C |";
641        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
642
643        let fixed = rule.fix(&ctx).unwrap();
644
645        assert!(fixed.contains(":---"), "Should contain left alignment");
646        assert!(fixed.contains(":----:"), "Should contain center alignment");
647        assert!(fixed.contains("----:"), "Should contain right alignment");
648    }
649
650    #[test]
651    fn test_md060_minimum_column_width() {
652        let rule = MD060TableFormat::new(true, "aligned".to_string());
653
654        // Test with very short column content to ensure minimum width of 3
655        // GFM requires at least 3 dashes in delimiter rows
656        let content = "| ID | Name |\n|-|-|\n| 1 | A |";
657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
658
659        let fixed = rule.fix(&ctx).unwrap();
660
661        let lines: Vec<&str> = fixed.lines().collect();
662        assert_eq!(lines[0].len(), lines[1].len());
663        assert_eq!(lines[1].len(), lines[2].len());
664
665        // Verify minimum width is enforced
666        assert!(fixed.contains("ID "), "Short content should be padded");
667        assert!(fixed.contains("---"), "Delimiter should have at least 3 dashes");
668    }
669}