rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
28            return false;
29        }
30
31        // Skip lines that are clearly code or inline code
32        if trimmed.starts_with("`") || trimmed.contains("``") {
33            return false;
34        }
35
36        // Must have at least 2 parts when split by |
37        let parts: Vec<&str> = trimmed.split('|').collect();
38        if parts.len() < 2 {
39            return false;
40        }
41
42        // Check if it looks like a table row by having reasonable content between pipes
43        let mut valid_parts = 0;
44        let mut total_non_empty_parts = 0;
45
46        for part in &parts {
47            let part_trimmed = part.trim();
48            // Skip empty parts (from leading/trailing pipes)
49            if part_trimmed.is_empty() {
50                continue;
51            }
52            total_non_empty_parts += 1;
53
54            // Count parts that look like table cells (reasonable content, no newlines)
55            if !part_trimmed.contains('\n') {
56                valid_parts += 1;
57            }
58        }
59
60        // All non-empty parts must be valid (no newlines) for the row to be valid
61        if total_non_empty_parts == 0 {
62            return false;
63        }
64
65        if valid_parts != total_non_empty_parts {
66            // Some cells contain newlines, not a valid table row
67            return false;
68        }
69
70        // GFM allows single-column tables, so >= 1 valid part is enough
71        // when the line has proper table formatting (pipes)
72        if trimmed.starts_with('|') && trimmed.ends_with('|') {
73            // Properly formatted table row with pipes on both ends
74            valid_parts >= 1
75        } else {
76            // For rows without proper pipe formatting, require at least 2 cells
77            valid_parts >= 2
78        }
79    }
80
81    /// Check if a line is a table delimiter row (e.g., |---|---|)
82    pub fn is_delimiter_row(line: &str) -> bool {
83        let trimmed = line.trim();
84        if !trimmed.contains('|') || !trimmed.contains('-') {
85            return false;
86        }
87
88        // Split by pipes and check each part
89        let parts: Vec<&str> = trimmed.split('|').collect();
90        let mut valid_delimiter_parts = 0;
91
92        for part in &parts {
93            let part_trimmed = part.trim();
94            if part_trimmed.is_empty() {
95                continue; // Skip empty parts from leading/trailing pipes
96            }
97
98            // Check if this part looks like a delimiter (contains dashes and optionally colons)
99            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
100                valid_delimiter_parts += 1;
101            }
102        }
103
104        valid_delimiter_parts >= 2
105    }
106
107    /// Find all table blocks in the content with optimized detection
108    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
109        let lines: Vec<&str> = content.lines().collect();
110        let mut tables = Vec::new();
111        let mut i = 0;
112
113        // Pre-compute line positions for efficient code block checking
114        let mut line_positions = Vec::with_capacity(lines.len());
115        let mut pos = 0;
116        for line in &lines {
117            line_positions.push(pos);
118            pos += line.len() + 1; // +1 for newline
119        }
120
121        while i < lines.len() {
122            // Skip lines in code blocks using cached code blocks from context
123            let line_start = line_positions[i];
124            if ctx.is_in_code_block_or_span(line_start) {
125                i += 1;
126                continue;
127            }
128
129            // Look for potential table start
130            if Self::is_potential_table_row(lines[i]) {
131                // Check if the next line is a delimiter row
132                if i + 1 < lines.len() && Self::is_delimiter_row(lines[i + 1]) {
133                    // Found a table! Find its end
134                    let table_start = i;
135                    let header_line = i;
136                    let delimiter_line = i + 1;
137                    let mut table_end = i + 1; // Include the delimiter row
138                    let mut content_lines = Vec::new();
139
140                    // Continue while we have table rows
141                    let mut j = i + 2;
142                    while j < lines.len() {
143                        let line = lines[j];
144                        if line.trim().is_empty() {
145                            // Empty line ends the table
146                            break;
147                        }
148                        if Self::is_potential_table_row(line) {
149                            content_lines.push(j);
150                            table_end = j;
151                            j += 1;
152                        } else {
153                            // Non-table line ends the table
154                            break;
155                        }
156                    }
157
158                    tables.push(TableBlock {
159                        start_line: table_start,
160                        end_line: table_end,
161                        header_line,
162                        delimiter_line,
163                        content_lines,
164                    });
165                    i = table_end + 1;
166                } else {
167                    i += 1;
168                }
169            } else {
170                i += 1;
171            }
172        }
173
174        tables
175    }
176
177    /// Count the number of cells in a table row
178    pub fn count_cells(row: &str) -> usize {
179        let trimmed = row.trim();
180
181        // Skip non-table rows
182        if !trimmed.contains('|') {
183            return 0;
184        }
185
186        // Users shouldn't have to escape pipes in regex patterns, etc.
187        let masked_row = Self::mask_pipes_in_inline_code(trimmed);
188
189        // Handle case with leading/trailing pipes
190        let mut cell_count = 0;
191        let parts: Vec<&str> = masked_row.split('|').collect();
192
193        for (i, part) in parts.iter().enumerate() {
194            // Skip first part if it's empty and there's a leading pipe
195            if i == 0 && part.trim().is_empty() && parts.len() > 1 {
196                continue;
197            }
198
199            // Skip last part if it's empty and there's a trailing pipe
200            if i == parts.len() - 1 && part.trim().is_empty() && parts.len() > 1 {
201                continue;
202            }
203
204            cell_count += 1;
205        }
206
207        cell_count
208    }
209
210    /// Mask pipes inside inline code blocks with a placeholder character
211    fn mask_pipes_in_inline_code(text: &str) -> String {
212        let mut result = String::new();
213        let chars: Vec<char> = text.chars().collect();
214        let mut i = 0;
215
216        while i < chars.len() {
217            if chars[i] == '`' {
218                // Count consecutive backticks at start
219                let start = i;
220                let mut backtick_count = 0;
221                while i < chars.len() && chars[i] == '`' {
222                    backtick_count += 1;
223                    i += 1;
224                }
225
226                // Look for matching closing backticks
227                let mut found_closing = false;
228                let mut j = i;
229
230                while j < chars.len() {
231                    if chars[j] == '`' {
232                        // Count potential closing backticks
233                        let close_start = j;
234                        let mut close_count = 0;
235                        while j < chars.len() && chars[j] == '`' {
236                            close_count += 1;
237                            j += 1;
238                        }
239
240                        if close_count == backtick_count {
241                            // Found matching closing backticks
242                            found_closing = true;
243
244                            // Valid inline code - add with pipes masked
245                            result.extend(chars[start..i].iter());
246
247                            for &ch in chars.iter().take(close_start).skip(i) {
248                                if ch == '|' {
249                                    result.push('_'); // Mask pipe with underscore
250                                } else {
251                                    result.push(ch);
252                                }
253                            }
254
255                            result.extend(chars[close_start..j].iter());
256                            i = j;
257                            break;
258                        }
259                        // If not matching, continue searching (j is already past these backticks)
260                    } else {
261                        j += 1;
262                    }
263                }
264
265                if !found_closing {
266                    // No matching closing found, treat as regular text
267                    result.extend(chars[start..i].iter());
268                }
269            } else {
270                result.push(chars[i]);
271                i += 1;
272            }
273        }
274
275        result
276    }
277
278    /// Determine the pipe style of a table row
279    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
280        let trimmed = line.trim();
281        if !trimmed.contains('|') {
282            return None;
283        }
284
285        let has_leading = trimmed.starts_with('|');
286        let has_trailing = trimmed.ends_with('|');
287
288        match (has_leading, has_trailing) {
289            (true, true) => Some("leading_and_trailing"),
290            (true, false) => Some("leading_only"),
291            (false, true) => Some("trailing_only"),
292            (false, false) => Some("no_leading_or_trailing"),
293        }
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use crate::lint_context::LintContext;
301
302    #[test]
303    fn test_is_potential_table_row() {
304        // Basic valid table rows
305        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
306        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
307        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
308        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
309
310        // Multiple cells
311        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
312
313        // With whitespace
314        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
315        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
316
317        // Not table rows
318        assert!(!TableUtils::is_potential_table_row("- List item"));
319        assert!(!TableUtils::is_potential_table_row("* Another list"));
320        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
321        assert!(!TableUtils::is_potential_table_row("Regular text"));
322        assert!(!TableUtils::is_potential_table_row(""));
323        assert!(!TableUtils::is_potential_table_row("   "));
324
325        // Code blocks
326        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
327        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
328
329        // Single pipe not enough
330        assert!(!TableUtils::is_potential_table_row("Just one |"));
331        assert!(!TableUtils::is_potential_table_row("| Just one"));
332
333        // Very long cells are valid in tables (no length limit for cell content)
334        let long_cell = "a".repeat(150);
335        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
336
337        // Cells with newlines
338        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
339    }
340
341    #[test]
342    fn test_is_delimiter_row() {
343        // Basic delimiter rows
344        assert!(TableUtils::is_delimiter_row("|---|---|"));
345        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
346        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
347        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
348
349        // With varying dash counts
350        assert!(TableUtils::is_delimiter_row("|-|--|"));
351        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
352
353        // With whitespace
354        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
355        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
356
357        // Multiple columns
358        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
359
360        // Without leading/trailing pipes
361        assert!(TableUtils::is_delimiter_row("--- | ---"));
362        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
363
364        // Not delimiter rows
365        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
366        assert!(!TableUtils::is_delimiter_row("Regular text"));
367        assert!(!TableUtils::is_delimiter_row(""));
368        assert!(!TableUtils::is_delimiter_row("|||"));
369        assert!(!TableUtils::is_delimiter_row("| | |"));
370
371        // Must have dashes
372        assert!(!TableUtils::is_delimiter_row("| : | : |"));
373        assert!(!TableUtils::is_delimiter_row("|    |    |"));
374
375        // Mixed content
376        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
377        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
378    }
379
380    #[test]
381    fn test_count_cells() {
382        // Basic counts
383        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
384        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
385        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
386        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
387
388        // Single cell
389        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
390        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
391
392        // Empty cells
393        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
394        assert_eq!(TableUtils::count_cells("| | | |"), 3);
395
396        // Many cells
397        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
398
399        // Edge cases
400        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
401        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
402
403        // No table
404        assert_eq!(TableUtils::count_cells("Regular text"), 0);
405        assert_eq!(TableUtils::count_cells(""), 0);
406        assert_eq!(TableUtils::count_cells("   "), 0);
407
408        // Whitespace handling
409        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
410        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
411    }
412
413    #[test]
414    fn test_count_cells_with_inline_code() {
415        // Test the user's actual example from Issue #34
416        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
417        assert_eq!(
418            TableUtils::count_cells("| Hour:minute:second formats | `^([0-1]?\\d|2[0-3]):[0-5]\\d:[0-5]\\d$` |"),
419            2
420        );
421
422        // Test basic inline code with pipes
423        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
424        assert_eq!(TableUtils::count_cells("| A | `code | with | pipes` | B |"), 3);
425
426        // Test escaped pipes (correct GFM)
427        assert_eq!(TableUtils::count_cells("| Command | `echo \\| grep` |"), 2);
428
429        // Test multiple inline code blocks
430        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
431
432        // Test edge cases
433        assert_eq!(TableUtils::count_cells("| Empty inline | `` | cell |"), 3);
434        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
435
436        // Test that basic table structure still works
437        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
438        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
439    }
440
441    #[test]
442    fn test_determine_pipe_style() {
443        // All pipe styles
444        assert_eq!(
445            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
446            Some("leading_and_trailing")
447        );
448        assert_eq!(
449            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
450            Some("leading_only")
451        );
452        assert_eq!(
453            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
454            Some("trailing_only")
455        );
456        assert_eq!(
457            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
458            Some("no_leading_or_trailing")
459        );
460
461        // With whitespace
462        assert_eq!(
463            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
464            Some("leading_and_trailing")
465        );
466        assert_eq!(
467            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
468            Some("leading_only")
469        );
470
471        // No pipes
472        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
473        assert_eq!(TableUtils::determine_pipe_style(""), None);
474        assert_eq!(TableUtils::determine_pipe_style("   "), None);
475
476        // Single pipe cases
477        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
478        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
479        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
480    }
481
482    #[test]
483    fn test_find_table_blocks_simple() {
484        let content = "| Header 1 | Header 2 |
485|-----------|-----------|
486| Cell 1    | Cell 2    |
487| Cell 3    | Cell 4    |";
488
489        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
490
491        let tables = TableUtils::find_table_blocks(content, &ctx);
492        assert_eq!(tables.len(), 1);
493
494        let table = &tables[0];
495        assert_eq!(table.start_line, 0);
496        assert_eq!(table.end_line, 3);
497        assert_eq!(table.header_line, 0);
498        assert_eq!(table.delimiter_line, 1);
499        assert_eq!(table.content_lines, vec![2, 3]);
500    }
501
502    #[test]
503    fn test_find_table_blocks_multiple() {
504        let content = "Some text
505
506| Table 1 | Col A |
507|----------|-------|
508| Data 1   | Val 1 |
509
510More text
511
512| Table 2 | Col 2 |
513|----------|-------|
514| Data 2   | Data  |";
515
516        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
517
518        let tables = TableUtils::find_table_blocks(content, &ctx);
519        assert_eq!(tables.len(), 2);
520
521        // First table
522        assert_eq!(tables[0].start_line, 2);
523        assert_eq!(tables[0].end_line, 4);
524        assert_eq!(tables[0].header_line, 2);
525        assert_eq!(tables[0].delimiter_line, 3);
526        assert_eq!(tables[0].content_lines, vec![4]);
527
528        // Second table
529        assert_eq!(tables[1].start_line, 8);
530        assert_eq!(tables[1].end_line, 10);
531        assert_eq!(tables[1].header_line, 8);
532        assert_eq!(tables[1].delimiter_line, 9);
533        assert_eq!(tables[1].content_lines, vec![10]);
534    }
535
536    #[test]
537    fn test_find_table_blocks_no_content_rows() {
538        let content = "| Header 1 | Header 2 |
539|-----------|-----------|
540
541Next paragraph";
542
543        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
544
545        let tables = TableUtils::find_table_blocks(content, &ctx);
546        assert_eq!(tables.len(), 1);
547
548        let table = &tables[0];
549        assert_eq!(table.start_line, 0);
550        assert_eq!(table.end_line, 1); // Just header and delimiter
551        assert_eq!(table.content_lines.len(), 0);
552    }
553
554    #[test]
555    fn test_find_table_blocks_in_code_block() {
556        let content = "```
557| Not | A | Table |
558|-----|---|-------|
559| In  | Code | Block |
560```
561
562| Real | Table |
563|------|-------|
564| Data | Here  |";
565
566        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
567
568        let tables = TableUtils::find_table_blocks(content, &ctx);
569        assert_eq!(tables.len(), 1); // Only the table outside code block
570
571        let table = &tables[0];
572        assert_eq!(table.header_line, 6);
573        assert_eq!(table.delimiter_line, 7);
574    }
575
576    #[test]
577    fn test_find_table_blocks_no_tables() {
578        let content = "Just regular text
579No tables here
580- List item with | pipe
581* Another list item";
582
583        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
584
585        let tables = TableUtils::find_table_blocks(content, &ctx);
586        assert_eq!(tables.len(), 0);
587    }
588
589    #[test]
590    fn test_find_table_blocks_malformed() {
591        let content = "| Header without delimiter |
592| This looks like table |
593But no delimiter row
594
595| Proper | Table |
596|---------|-------|
597| Data    | Here  |";
598
599        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
600
601        let tables = TableUtils::find_table_blocks(content, &ctx);
602        assert_eq!(tables.len(), 1); // Only the proper table
603        assert_eq!(tables[0].header_line, 4);
604    }
605
606    #[test]
607    fn test_edge_cases() {
608        // Test empty content
609        assert!(!TableUtils::is_potential_table_row(""));
610        assert!(!TableUtils::is_delimiter_row(""));
611        assert_eq!(TableUtils::count_cells(""), 0);
612        assert_eq!(TableUtils::determine_pipe_style(""), None);
613
614        // Test whitespace only
615        assert!(!TableUtils::is_potential_table_row("   "));
616        assert!(!TableUtils::is_delimiter_row("   "));
617        assert_eq!(TableUtils::count_cells("   "), 0);
618        assert_eq!(TableUtils::determine_pipe_style("   "), None);
619
620        // Test single character
621        assert!(!TableUtils::is_potential_table_row("|"));
622        assert!(!TableUtils::is_delimiter_row("|"));
623        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
624
625        // Test very long lines are valid table rows (no length limit)
626        // Test both single-column and multi-column long lines
627        let long_single = format!("| {} |", "a".repeat(200));
628        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
629
630        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
631        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
632
633        // Test unicode
634        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
635        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
636        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
637    }
638
639    #[test]
640    fn test_table_block_struct() {
641        let block = TableBlock {
642            start_line: 0,
643            end_line: 5,
644            header_line: 0,
645            delimiter_line: 1,
646            content_lines: vec![2, 3, 4, 5],
647        };
648
649        // Test Debug trait
650        let debug_str = format!("{block:?}");
651        assert!(debug_str.contains("TableBlock"));
652        assert!(debug_str.contains("start_line: 0"));
653
654        // Test Clone trait
655        let cloned = block.clone();
656        assert_eq!(cloned.start_line, block.start_line);
657        assert_eq!(cloned.end_line, block.end_line);
658        assert_eq!(cloned.header_line, block.header_line);
659        assert_eq!(cloned.delimiter_line, block.delimiter_line);
660        assert_eq!(cloned.content_lines, block.content_lines);
661    }
662}