rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        // Unordered list items with space or tab after marker
28        if trimmed.starts_with("- ")
29            || trimmed.starts_with("* ")
30            || trimmed.starts_with("+ ")
31            || trimmed.starts_with("-\t")
32            || trimmed.starts_with("*\t")
33            || trimmed.starts_with("+\t")
34        {
35            return false;
36        }
37
38        // Skip ordered list items: digits followed by . or ) then space/tab
39        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
40            && first_non_digit > 0
41        {
42            let after_digits = &trimmed[first_non_digit..];
43            if after_digits.starts_with(". ")
44                || after_digits.starts_with(".\t")
45                || after_digits.starts_with(") ")
46                || after_digits.starts_with(")\t")
47            {
48                return false;
49            }
50        }
51
52        // Skip lines that are clearly code or inline code
53        if trimmed.starts_with("`") || trimmed.contains("``") {
54            return false;
55        }
56
57        // Must have at least 2 parts when split by |
58        let parts: Vec<&str> = trimmed.split('|').collect();
59        if parts.len() < 2 {
60            return false;
61        }
62
63        // Check if it looks like a table row by having reasonable content between pipes
64        let mut valid_parts = 0;
65        let mut total_non_empty_parts = 0;
66
67        for part in &parts {
68            let part_trimmed = part.trim();
69            // Skip empty parts (from leading/trailing pipes)
70            if part_trimmed.is_empty() {
71                continue;
72            }
73            total_non_empty_parts += 1;
74
75            // Count parts that look like table cells (reasonable content, no newlines)
76            if !part_trimmed.contains('\n') {
77                valid_parts += 1;
78            }
79        }
80
81        // Check if all non-empty parts are valid (no newlines)
82        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
83            // Some cells contain newlines, not a valid table row
84            return false;
85        }
86
87        // GFM allows tables with all empty cells (e.g., |||)
88        // These are valid if they have proper table formatting (leading and trailing pipes)
89        if total_non_empty_parts == 0 {
90            // Empty cells are only valid with proper pipe formatting
91            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
92        }
93
94        // GFM allows single-column tables, so >= 1 valid part is enough
95        // when the line has proper table formatting (pipes)
96        if trimmed.starts_with('|') && trimmed.ends_with('|') {
97            // Properly formatted table row with pipes on both ends
98            valid_parts >= 1
99        } else {
100            // For rows without proper pipe formatting, require at least 2 cells
101            valid_parts >= 2
102        }
103    }
104
105    /// Check if a line is a table delimiter row (e.g., |---|---|)
106    pub fn is_delimiter_row(line: &str) -> bool {
107        let trimmed = line.trim();
108        if !trimmed.contains('|') || !trimmed.contains('-') {
109            return false;
110        }
111
112        // Split by pipes and check each part
113        let parts: Vec<&str> = trimmed.split('|').collect();
114        let mut valid_delimiter_parts = 0;
115        let mut total_non_empty_parts = 0;
116
117        for part in &parts {
118            let part_trimmed = part.trim();
119            if part_trimmed.is_empty() {
120                continue; // Skip empty parts from leading/trailing pipes
121            }
122
123            total_non_empty_parts += 1;
124
125            // Check if this part looks like a delimiter (contains dashes and optionally colons)
126            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
127                valid_delimiter_parts += 1;
128            }
129        }
130
131        // All non-empty parts must be valid delimiters, and there must be at least one
132        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
133    }
134
135    /// Find all table blocks in the content with optimized detection
136    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
137    pub fn find_table_blocks_with_code_info(
138        content: &str,
139        code_blocks: &[(usize, usize)],
140        code_spans: &[crate::lint_context::CodeSpan],
141        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
142    ) -> Vec<TableBlock> {
143        let lines: Vec<&str> = content.lines().collect();
144        let mut tables = Vec::new();
145        let mut i = 0;
146
147        // Pre-compute line positions for efficient code block checking
148        let mut line_positions = Vec::with_capacity(lines.len());
149        let mut pos = 0;
150        for line in &lines {
151            line_positions.push(pos);
152            pos += line.len() + 1; // +1 for newline
153        }
154
155        while i < lines.len() {
156            // Skip lines in code blocks, code spans, or HTML comments
157            let line_start = line_positions[i];
158            let in_code =
159                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
160                    || code_spans
161                        .iter()
162                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
163            let in_html_comment = html_comment_ranges
164                .iter()
165                .any(|range| line_start >= range.start && line_start < range.end);
166
167            if in_code || in_html_comment {
168                i += 1;
169                continue;
170            }
171
172            // Look for potential table start
173            if Self::is_potential_table_row(lines[i]) {
174                // Check if the next line is a delimiter row
175                if i + 1 < lines.len() && Self::is_delimiter_row(lines[i + 1]) {
176                    // Found a table! Find its end
177                    let table_start = i;
178                    let header_line = i;
179                    let delimiter_line = i + 1;
180                    let mut table_end = i + 1; // Include the delimiter row
181                    let mut content_lines = Vec::new();
182
183                    // Continue while we have table rows
184                    let mut j = i + 2;
185                    while j < lines.len() {
186                        let line = lines[j];
187                        if line.trim().is_empty() {
188                            // Empty line ends the table
189                            break;
190                        }
191                        if Self::is_potential_table_row(line) {
192                            content_lines.push(j);
193                            table_end = j;
194                            j += 1;
195                        } else {
196                            // Non-table line ends the table
197                            break;
198                        }
199                    }
200
201                    tables.push(TableBlock {
202                        start_line: table_start,
203                        end_line: table_end,
204                        header_line,
205                        delimiter_line,
206                        content_lines,
207                    });
208                    i = table_end + 1;
209                } else {
210                    i += 1;
211                }
212            } else {
213                i += 1;
214            }
215        }
216
217        tables
218    }
219
220    /// Find all table blocks in the content with optimized detection
221    /// This is a backward-compatible wrapper that accepts LintContext
222    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
223        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
224    }
225
226    /// Count the number of cells in a table row
227    pub fn count_cells(row: &str) -> usize {
228        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
229    }
230
231    /// Count the number of cells in a table row with flavor-specific behavior
232    ///
233    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
234    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
235    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
236        Self::split_table_row_with_flavor(row, flavor).len()
237    }
238
239    /// Mask pipes inside inline code blocks with a placeholder character
240    pub fn mask_pipes_in_inline_code(text: &str) -> String {
241        let mut result = String::new();
242        let chars: Vec<char> = text.chars().collect();
243        let mut i = 0;
244
245        while i < chars.len() {
246            if chars[i] == '`' {
247                // Count consecutive backticks at start
248                let start = i;
249                let mut backtick_count = 0;
250                while i < chars.len() && chars[i] == '`' {
251                    backtick_count += 1;
252                    i += 1;
253                }
254
255                // Look for matching closing backticks
256                let mut found_closing = false;
257                let mut j = i;
258
259                while j < chars.len() {
260                    if chars[j] == '`' {
261                        // Count potential closing backticks
262                        let close_start = j;
263                        let mut close_count = 0;
264                        while j < chars.len() && chars[j] == '`' {
265                            close_count += 1;
266                            j += 1;
267                        }
268
269                        if close_count == backtick_count {
270                            // Found matching closing backticks
271                            found_closing = true;
272
273                            // Valid inline code - add with pipes masked
274                            result.extend(chars[start..i].iter());
275
276                            for &ch in chars.iter().take(close_start).skip(i) {
277                                if ch == '|' {
278                                    result.push('_'); // Mask pipe with underscore
279                                } else {
280                                    result.push(ch);
281                                }
282                            }
283
284                            result.extend(chars[close_start..j].iter());
285                            i = j;
286                            break;
287                        }
288                        // If not matching, continue searching (j is already past these backticks)
289                    } else {
290                        j += 1;
291                    }
292                }
293
294                if !found_closing {
295                    // No matching closing found, treat as regular text
296                    result.extend(chars[start..i].iter());
297                }
298            } else {
299                result.push(chars[i]);
300                i += 1;
301            }
302        }
303
304        result
305    }
306
307    /// Escape pipes inside inline code blocks with backslash.
308    /// Converts `|` to `\|` inside backtick spans.
309    /// Used by auto-fix to preserve content while making tables valid.
310    pub fn escape_pipes_in_inline_code(text: &str) -> String {
311        let mut result = String::new();
312        let chars: Vec<char> = text.chars().collect();
313        let mut i = 0;
314
315        while i < chars.len() {
316            if chars[i] == '`' {
317                let start = i;
318                let mut backtick_count = 0;
319                while i < chars.len() && chars[i] == '`' {
320                    backtick_count += 1;
321                    i += 1;
322                }
323
324                let mut found_closing = false;
325                let mut j = i;
326
327                while j < chars.len() {
328                    if chars[j] == '`' {
329                        let close_start = j;
330                        let mut close_count = 0;
331                        while j < chars.len() && chars[j] == '`' {
332                            close_count += 1;
333                            j += 1;
334                        }
335
336                        if close_count == backtick_count {
337                            found_closing = true;
338                            result.extend(chars[start..i].iter());
339
340                            for &ch in chars.iter().take(close_start).skip(i) {
341                                if ch == '|' {
342                                    result.push('\\');
343                                    result.push('|');
344                                } else {
345                                    result.push(ch);
346                                }
347                            }
348
349                            result.extend(chars[close_start..j].iter());
350                            i = j;
351                            break;
352                        }
353                    } else {
354                        j += 1;
355                    }
356                }
357
358                if !found_closing {
359                    result.extend(chars[start..i].iter());
360                }
361            } else {
362                result.push(chars[i]);
363                i += 1;
364            }
365        }
366
367        result
368    }
369
370    /// Mask escaped pipes for accurate table cell parsing
371    ///
372    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
373    /// - `\|` → escaped pipe → masked (stays as cell content)
374    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
375    ///
376    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
377    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
378    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
379    ///
380    /// To include a literal pipe in a table cell (even in code), you must escape it:
381    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
382    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
383        let mut result = String::new();
384        let chars: Vec<char> = text.chars().collect();
385        let mut i = 0;
386
387        while i < chars.len() {
388            if chars[i] == '\\' {
389                if i + 1 < chars.len() && chars[i + 1] == '\\' {
390                    // Escaped backslash: \\ → push both and continue
391                    // The next character (if it's a pipe) will be a real delimiter
392                    result.push('\\');
393                    result.push('\\');
394                    i += 2;
395                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
396                    // Escaped pipe: \| → mask the pipe
397                    result.push('\\');
398                    result.push('_'); // Mask the pipe
399                    i += 2;
400                } else {
401                    // Single backslash not followed by \ or | → just push it
402                    result.push(chars[i]);
403                    i += 1;
404                }
405            } else {
406                result.push(chars[i]);
407                i += 1;
408            }
409        }
410
411        result
412    }
413
414    /// Split a table row into individual cell contents with flavor-specific behavior.
415    ///
416    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
417    /// This is the foundation for both cell counting and cell content extraction.
418    ///
419    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
420    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
421    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
422        let trimmed = row.trim();
423
424        if !trimmed.contains('|') {
425            return Vec::new();
426        }
427
428        // First, mask escaped pipes (same for all flavors)
429        let masked = Self::mask_pipes_for_table_parsing(trimmed);
430
431        // For MkDocs flavor, also mask pipes inside inline code
432        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
433            Self::mask_pipes_in_inline_code(&masked)
434        } else {
435            masked
436        };
437
438        let has_leading = final_masked.starts_with('|');
439        let has_trailing = final_masked.ends_with('|');
440
441        let mut masked_content = final_masked.as_str();
442        let mut orig_content = trimmed;
443
444        if has_leading {
445            masked_content = &masked_content[1..];
446            orig_content = &orig_content[1..];
447        }
448
449        // Track whether we actually strip a trailing pipe
450        let stripped_trailing = has_trailing && !masked_content.is_empty();
451        if stripped_trailing {
452            masked_content = &masked_content[..masked_content.len() - 1];
453            orig_content = &orig_content[..orig_content.len() - 1];
454        }
455
456        // Handle edge cases for degenerate inputs
457        if masked_content.is_empty() {
458            if stripped_trailing {
459                // "||" case: two pipes with empty content between = one empty cell
460                return vec![String::new()];
461            } else {
462                // "|" case: single pipe, not a valid table row
463                return Vec::new();
464            }
465        }
466
467        let masked_parts: Vec<&str> = masked_content.split('|').collect();
468        let mut cells = Vec::new();
469        let mut pos = 0;
470
471        for masked_cell in masked_parts {
472            let cell_len = masked_cell.len();
473            let orig_cell = if pos + cell_len <= orig_content.len() {
474                &orig_content[pos..pos + cell_len]
475            } else {
476                masked_cell
477            };
478            cells.push(orig_cell.to_string());
479            pos += cell_len + 1; // +1 for the pipe delimiter
480        }
481
482        cells
483    }
484
485    /// Split a table row into individual cell contents using Standard/GFM behavior.
486    pub fn split_table_row(row: &str) -> Vec<String> {
487        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
488    }
489
490    /// Determine the pipe style of a table row
491    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
492        let trimmed = line.trim();
493        if !trimmed.contains('|') {
494            return None;
495        }
496
497        let has_leading = trimmed.starts_with('|');
498        let has_trailing = trimmed.ends_with('|');
499
500        match (has_leading, has_trailing) {
501            (true, true) => Some("leading_and_trailing"),
502            (true, false) => Some("leading_only"),
503            (false, true) => Some("trailing_only"),
504            (false, false) => Some("no_leading_or_trailing"),
505        }
506    }
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512    use crate::lint_context::LintContext;
513
514    #[test]
515    fn test_is_potential_table_row() {
516        // Basic valid table rows
517        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
518        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
519        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
520        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
521
522        // Multiple cells
523        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
524
525        // With whitespace
526        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
527        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
528
529        // Not table rows
530        assert!(!TableUtils::is_potential_table_row("- List item"));
531        assert!(!TableUtils::is_potential_table_row("* Another list"));
532        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
533        assert!(!TableUtils::is_potential_table_row("Regular text"));
534        assert!(!TableUtils::is_potential_table_row(""));
535        assert!(!TableUtils::is_potential_table_row("   "));
536
537        // Code blocks
538        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
539        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
540
541        // Single pipe not enough
542        assert!(!TableUtils::is_potential_table_row("Just one |"));
543        assert!(!TableUtils::is_potential_table_row("| Just one"));
544
545        // Very long cells are valid in tables (no length limit for cell content)
546        let long_cell = "a".repeat(150);
547        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
548
549        // Cells with newlines
550        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
551
552        // Empty cells (Issue #129)
553        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
554        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
555        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
556    }
557
558    #[test]
559    fn test_list_items_with_pipes_not_table_rows() {
560        // Ordered list items should NOT be detected as table rows
561        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
562        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
563        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
564        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
565        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
566
567        // Unordered list items with tabs
568        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
569        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
570        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
571
572        // Indented list items (the trim_start normalizes indentation)
573        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
574        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
575        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
576
577        // Task list items
578        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
579        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
580
581        // Multiple pipes in list items
582        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
583        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
584
585        // These SHOULD still be detected as potential table rows
586        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
587        assert!(TableUtils::is_potential_table_row("cell | cell"));
588        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
589    }
590
591    #[test]
592    fn test_is_delimiter_row() {
593        // Basic delimiter rows
594        assert!(TableUtils::is_delimiter_row("|---|---|"));
595        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
596        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
597        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
598
599        // With varying dash counts
600        assert!(TableUtils::is_delimiter_row("|-|--|"));
601        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
602
603        // With whitespace
604        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
605        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
606
607        // Multiple columns
608        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
609
610        // Without leading/trailing pipes
611        assert!(TableUtils::is_delimiter_row("--- | ---"));
612        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
613
614        // Not delimiter rows
615        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
616        assert!(!TableUtils::is_delimiter_row("Regular text"));
617        assert!(!TableUtils::is_delimiter_row(""));
618        assert!(!TableUtils::is_delimiter_row("|||"));
619        assert!(!TableUtils::is_delimiter_row("| | |"));
620
621        // Must have dashes
622        assert!(!TableUtils::is_delimiter_row("| : | : |"));
623        assert!(!TableUtils::is_delimiter_row("|    |    |"));
624
625        // Mixed content
626        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
627        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
628    }
629
630    #[test]
631    fn test_count_cells() {
632        // Basic counts
633        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
634        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
635        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
636        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
637
638        // Single cell
639        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
640        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
641
642        // Empty cells
643        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
644        assert_eq!(TableUtils::count_cells("| | | |"), 3);
645
646        // Many cells
647        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
648
649        // Edge cases
650        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
651        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
652
653        // No table
654        assert_eq!(TableUtils::count_cells("Regular text"), 0);
655        assert_eq!(TableUtils::count_cells(""), 0);
656        assert_eq!(TableUtils::count_cells("   "), 0);
657
658        // Whitespace handling
659        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
660        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
661    }
662
663    #[test]
664    fn test_count_cells_with_escaped_pipes() {
665        // In GFM tables, escape handling happens BEFORE cell splitting.
666        // Inline code does NOT protect pipes - they still act as cell delimiters.
667        // To include a literal pipe in a table cell, you MUST escape it with \|
668
669        // Basic table structure
670        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
671        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
672        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
673
674        // Escaped pipes: \| keeps the pipe as content
675        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
676        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
677
678        // Escaped pipes inside backticks (correct way to include | in code in tables)
679        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
680
681        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
682        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
683        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
684
685        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
686        // This matches GitHub's actual rendering where `a | b` splits into two cells
687        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
688        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
689        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
690
691        // The regex example from Issue #34 - pipes in regex patterns need escaping
692        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
693        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
694        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
695        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
696    }
697
698    #[test]
699    fn test_determine_pipe_style() {
700        // All pipe styles
701        assert_eq!(
702            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
703            Some("leading_and_trailing")
704        );
705        assert_eq!(
706            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
707            Some("leading_only")
708        );
709        assert_eq!(
710            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
711            Some("trailing_only")
712        );
713        assert_eq!(
714            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
715            Some("no_leading_or_trailing")
716        );
717
718        // With whitespace
719        assert_eq!(
720            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
721            Some("leading_and_trailing")
722        );
723        assert_eq!(
724            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
725            Some("leading_only")
726        );
727
728        // No pipes
729        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
730        assert_eq!(TableUtils::determine_pipe_style(""), None);
731        assert_eq!(TableUtils::determine_pipe_style("   "), None);
732
733        // Single pipe cases
734        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
735        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
736        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
737    }
738
739    #[test]
740    fn test_find_table_blocks_simple() {
741        let content = "| Header 1 | Header 2 |
742|-----------|-----------|
743| Cell 1    | Cell 2    |
744| Cell 3    | Cell 4    |";
745
746        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
747
748        let tables = TableUtils::find_table_blocks(content, &ctx);
749        assert_eq!(tables.len(), 1);
750
751        let table = &tables[0];
752        assert_eq!(table.start_line, 0);
753        assert_eq!(table.end_line, 3);
754        assert_eq!(table.header_line, 0);
755        assert_eq!(table.delimiter_line, 1);
756        assert_eq!(table.content_lines, vec![2, 3]);
757    }
758
759    #[test]
760    fn test_find_table_blocks_multiple() {
761        let content = "Some text
762
763| Table 1 | Col A |
764|----------|-------|
765| Data 1   | Val 1 |
766
767More text
768
769| Table 2 | Col 2 |
770|----------|-------|
771| Data 2   | Data  |";
772
773        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
774
775        let tables = TableUtils::find_table_blocks(content, &ctx);
776        assert_eq!(tables.len(), 2);
777
778        // First table
779        assert_eq!(tables[0].start_line, 2);
780        assert_eq!(tables[0].end_line, 4);
781        assert_eq!(tables[0].header_line, 2);
782        assert_eq!(tables[0].delimiter_line, 3);
783        assert_eq!(tables[0].content_lines, vec![4]);
784
785        // Second table
786        assert_eq!(tables[1].start_line, 8);
787        assert_eq!(tables[1].end_line, 10);
788        assert_eq!(tables[1].header_line, 8);
789        assert_eq!(tables[1].delimiter_line, 9);
790        assert_eq!(tables[1].content_lines, vec![10]);
791    }
792
793    #[test]
794    fn test_find_table_blocks_no_content_rows() {
795        let content = "| Header 1 | Header 2 |
796|-----------|-----------|
797
798Next paragraph";
799
800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
801
802        let tables = TableUtils::find_table_blocks(content, &ctx);
803        assert_eq!(tables.len(), 1);
804
805        let table = &tables[0];
806        assert_eq!(table.start_line, 0);
807        assert_eq!(table.end_line, 1); // Just header and delimiter
808        assert_eq!(table.content_lines.len(), 0);
809    }
810
811    #[test]
812    fn test_find_table_blocks_in_code_block() {
813        let content = "```
814| Not | A | Table |
815|-----|---|-------|
816| In  | Code | Block |
817```
818
819| Real | Table |
820|------|-------|
821| Data | Here  |";
822
823        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
824
825        let tables = TableUtils::find_table_blocks(content, &ctx);
826        assert_eq!(tables.len(), 1); // Only the table outside code block
827
828        let table = &tables[0];
829        assert_eq!(table.header_line, 6);
830        assert_eq!(table.delimiter_line, 7);
831    }
832
833    #[test]
834    fn test_find_table_blocks_no_tables() {
835        let content = "Just regular text
836No tables here
837- List item with | pipe
838* Another list item";
839
840        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
841
842        let tables = TableUtils::find_table_blocks(content, &ctx);
843        assert_eq!(tables.len(), 0);
844    }
845
846    #[test]
847    fn test_find_table_blocks_malformed() {
848        let content = "| Header without delimiter |
849| This looks like table |
850But no delimiter row
851
852| Proper | Table |
853|---------|-------|
854| Data    | Here  |";
855
856        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
857
858        let tables = TableUtils::find_table_blocks(content, &ctx);
859        assert_eq!(tables.len(), 1); // Only the proper table
860        assert_eq!(tables[0].header_line, 4);
861    }
862
863    #[test]
864    fn test_edge_cases() {
865        // Test empty content
866        assert!(!TableUtils::is_potential_table_row(""));
867        assert!(!TableUtils::is_delimiter_row(""));
868        assert_eq!(TableUtils::count_cells(""), 0);
869        assert_eq!(TableUtils::determine_pipe_style(""), None);
870
871        // Test whitespace only
872        assert!(!TableUtils::is_potential_table_row("   "));
873        assert!(!TableUtils::is_delimiter_row("   "));
874        assert_eq!(TableUtils::count_cells("   "), 0);
875        assert_eq!(TableUtils::determine_pipe_style("   "), None);
876
877        // Test single character
878        assert!(!TableUtils::is_potential_table_row("|"));
879        assert!(!TableUtils::is_delimiter_row("|"));
880        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
881
882        // Test very long lines are valid table rows (no length limit)
883        // Test both single-column and multi-column long lines
884        let long_single = format!("| {} |", "a".repeat(200));
885        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
886
887        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
888        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
889
890        // Test unicode
891        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
892        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
893        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
894    }
895
896    #[test]
897    fn test_table_block_struct() {
898        let block = TableBlock {
899            start_line: 0,
900            end_line: 5,
901            header_line: 0,
902            delimiter_line: 1,
903            content_lines: vec![2, 3, 4, 5],
904        };
905
906        // Test Debug trait
907        let debug_str = format!("{block:?}");
908        assert!(debug_str.contains("TableBlock"));
909        assert!(debug_str.contains("start_line: 0"));
910
911        // Test Clone trait
912        let cloned = block.clone();
913        assert_eq!(cloned.start_line, block.start_line);
914        assert_eq!(cloned.end_line, block.end_line);
915        assert_eq!(cloned.header_line, block.header_line);
916        assert_eq!(cloned.delimiter_line, block.delimiter_line);
917        assert_eq!(cloned.content_lines, block.content_lines);
918    }
919
920    #[test]
921    fn test_split_table_row() {
922        // Basic split
923        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
924        assert_eq!(cells.len(), 3);
925        assert_eq!(cells[0].trim(), "Cell 1");
926        assert_eq!(cells[1].trim(), "Cell 2");
927        assert_eq!(cells[2].trim(), "Cell 3");
928
929        // Without trailing pipe
930        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
931        assert_eq!(cells.len(), 2);
932
933        // Empty cells
934        let cells = TableUtils::split_table_row("| | | |");
935        assert_eq!(cells.len(), 3);
936
937        // Single cell
938        let cells = TableUtils::split_table_row("| Cell |");
939        assert_eq!(cells.len(), 1);
940        assert_eq!(cells[0].trim(), "Cell");
941
942        // No pipes
943        let cells = TableUtils::split_table_row("No pipes here");
944        assert_eq!(cells.len(), 0);
945    }
946
947    #[test]
948    fn test_split_table_row_with_escaped_pipes() {
949        // Escaped pipes should be preserved in cell content
950        let cells = TableUtils::split_table_row(r"| A | B \| C |");
951        assert_eq!(cells.len(), 2);
952        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
953
954        // Double backslash + pipe is NOT escaped
955        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
956        assert_eq!(cells.len(), 3);
957    }
958
959    #[test]
960    fn test_split_table_row_with_flavor_mkdocs() {
961        // MkDocs flavor: pipes in inline code are NOT cell delimiters
962        let cells =
963            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
964        assert_eq!(cells.len(), 2);
965        assert!(
966            cells[1].contains("`x | y`"),
967            "Inline code with pipe should be single cell in MkDocs flavor"
968        );
969
970        // Multiple pipes in inline code
971        let cells =
972            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
973        assert_eq!(cells.len(), 2);
974        assert!(cells[1].contains("`a | b | c`"));
975    }
976
977    #[test]
978    fn test_split_table_row_with_flavor_standard() {
979        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
980        let cells =
981            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
982        // In GFM, `x | y` splits into separate cells
983        assert_eq!(cells.len(), 3);
984    }
985}