rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        // Unordered list items with space or tab after marker
28        if trimmed.starts_with("- ")
29            || trimmed.starts_with("* ")
30            || trimmed.starts_with("+ ")
31            || trimmed.starts_with("-\t")
32            || trimmed.starts_with("*\t")
33            || trimmed.starts_with("+\t")
34        {
35            return false;
36        }
37
38        // Skip ordered list items: digits followed by . or ) then space/tab
39        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
40            && first_non_digit > 0
41        {
42            let after_digits = &trimmed[first_non_digit..];
43            if after_digits.starts_with(". ")
44                || after_digits.starts_with(".\t")
45                || after_digits.starts_with(") ")
46                || after_digits.starts_with(")\t")
47            {
48                return false;
49            }
50        }
51
52        // Skip lines that are clearly code or inline code
53        if trimmed.starts_with("`") || trimmed.contains("``") {
54            return false;
55        }
56
57        // Must have at least 2 parts when split by |
58        let parts: Vec<&str> = trimmed.split('|').collect();
59        if parts.len() < 2 {
60            return false;
61        }
62
63        // Check if it looks like a table row by having reasonable content between pipes
64        let mut valid_parts = 0;
65        let mut total_non_empty_parts = 0;
66
67        for part in &parts {
68            let part_trimmed = part.trim();
69            // Skip empty parts (from leading/trailing pipes)
70            if part_trimmed.is_empty() {
71                continue;
72            }
73            total_non_empty_parts += 1;
74
75            // Count parts that look like table cells (reasonable content, no newlines)
76            if !part_trimmed.contains('\n') {
77                valid_parts += 1;
78            }
79        }
80
81        // Check if all non-empty parts are valid (no newlines)
82        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
83            // Some cells contain newlines, not a valid table row
84            return false;
85        }
86
87        // GFM allows tables with all empty cells (e.g., |||)
88        // These are valid if they have proper table formatting (leading and trailing pipes)
89        if total_non_empty_parts == 0 {
90            // Empty cells are only valid with proper pipe formatting
91            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
92        }
93
94        // GFM allows single-column tables, so >= 1 valid part is enough
95        // when the line has proper table formatting (pipes)
96        if trimmed.starts_with('|') && trimmed.ends_with('|') {
97            // Properly formatted table row with pipes on both ends
98            valid_parts >= 1
99        } else {
100            // For rows without proper pipe formatting, require at least 2 cells
101            valid_parts >= 2
102        }
103    }
104
105    /// Check if a line is a table delimiter row (e.g., |---|---|)
106    pub fn is_delimiter_row(line: &str) -> bool {
107        let trimmed = line.trim();
108        if !trimmed.contains('|') || !trimmed.contains('-') {
109            return false;
110        }
111
112        // Split by pipes and check each part
113        let parts: Vec<&str> = trimmed.split('|').collect();
114        let mut valid_delimiter_parts = 0;
115        let mut total_non_empty_parts = 0;
116
117        for part in &parts {
118            let part_trimmed = part.trim();
119            if part_trimmed.is_empty() {
120                continue; // Skip empty parts from leading/trailing pipes
121            }
122
123            total_non_empty_parts += 1;
124
125            // Check if this part looks like a delimiter (contains dashes and optionally colons)
126            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
127                valid_delimiter_parts += 1;
128            }
129        }
130
131        // All non-empty parts must be valid delimiters, and there must be at least one
132        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
133    }
134
135    /// Find all table blocks in the content with optimized detection
136    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
137    pub fn find_table_blocks_with_code_info(
138        content: &str,
139        code_blocks: &[(usize, usize)],
140        code_spans: &[crate::lint_context::CodeSpan],
141        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
142    ) -> Vec<TableBlock> {
143        let lines: Vec<&str> = content.lines().collect();
144        let mut tables = Vec::new();
145        let mut i = 0;
146
147        // Pre-compute line positions for efficient code block checking
148        let mut line_positions = Vec::with_capacity(lines.len());
149        let mut pos = 0;
150        for line in &lines {
151            line_positions.push(pos);
152            pos += line.len() + 1; // +1 for newline
153        }
154
155        while i < lines.len() {
156            // Skip lines in code blocks, code spans, or HTML comments
157            let line_start = line_positions[i];
158            let in_code =
159                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
160                    || code_spans
161                        .iter()
162                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
163            let in_html_comment = html_comment_ranges
164                .iter()
165                .any(|range| line_start >= range.start && line_start < range.end);
166
167            if in_code || in_html_comment {
168                i += 1;
169                continue;
170            }
171
172            // Look for potential table start
173            if Self::is_potential_table_row(lines[i]) {
174                // Check if the next line is a delimiter row
175                if i + 1 < lines.len() && Self::is_delimiter_row(lines[i + 1]) {
176                    // Found a table! Find its end
177                    let table_start = i;
178                    let header_line = i;
179                    let delimiter_line = i + 1;
180                    let mut table_end = i + 1; // Include the delimiter row
181                    let mut content_lines = Vec::new();
182
183                    // Continue while we have table rows
184                    let mut j = i + 2;
185                    while j < lines.len() {
186                        let line = lines[j];
187                        if line.trim().is_empty() {
188                            // Empty line ends the table
189                            break;
190                        }
191                        if Self::is_potential_table_row(line) {
192                            content_lines.push(j);
193                            table_end = j;
194                            j += 1;
195                        } else {
196                            // Non-table line ends the table
197                            break;
198                        }
199                    }
200
201                    tables.push(TableBlock {
202                        start_line: table_start,
203                        end_line: table_end,
204                        header_line,
205                        delimiter_line,
206                        content_lines,
207                    });
208                    i = table_end + 1;
209                } else {
210                    i += 1;
211                }
212            } else {
213                i += 1;
214            }
215        }
216
217        tables
218    }
219
220    /// Find all table blocks in the content with optimized detection
221    /// This is a backward-compatible wrapper that accepts LintContext
222    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
223        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
224    }
225
226    /// Count the number of cells in a table row
227    /// Uses Standard/GFM behavior where pipes in inline code ARE cell delimiters
228    pub fn count_cells(row: &str) -> usize {
229        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
230    }
231
232    /// Count the number of cells in a table row with flavor-specific behavior
233    ///
234    /// Different Markdown flavors handle pipes inside inline code differently:
235    /// - Standard/GFM: Pipes in backticks ARE cell delimiters (GitHub behavior)
236    /// - MkDocs: Pipes in backticks are NOT cell delimiters (Python-Markdown behavior)
237    ///
238    /// This difference is due to Python-Markdown (used by MkDocs) fixing the parsing
239    /// to handle inline code spans before splitting by pipes, while GitHub GFM
240    /// splits by pipes first.
241    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
242        Self::split_table_row_with_flavor(row, flavor).len()
243    }
244
245    /// Mask pipes inside inline code blocks with a placeholder character
246    pub fn mask_pipes_in_inline_code(text: &str) -> String {
247        let mut result = String::new();
248        let chars: Vec<char> = text.chars().collect();
249        let mut i = 0;
250
251        while i < chars.len() {
252            if chars[i] == '`' {
253                // Count consecutive backticks at start
254                let start = i;
255                let mut backtick_count = 0;
256                while i < chars.len() && chars[i] == '`' {
257                    backtick_count += 1;
258                    i += 1;
259                }
260
261                // Look for matching closing backticks
262                let mut found_closing = false;
263                let mut j = i;
264
265                while j < chars.len() {
266                    if chars[j] == '`' {
267                        // Count potential closing backticks
268                        let close_start = j;
269                        let mut close_count = 0;
270                        while j < chars.len() && chars[j] == '`' {
271                            close_count += 1;
272                            j += 1;
273                        }
274
275                        if close_count == backtick_count {
276                            // Found matching closing backticks
277                            found_closing = true;
278
279                            // Valid inline code - add with pipes masked
280                            result.extend(chars[start..i].iter());
281
282                            for &ch in chars.iter().take(close_start).skip(i) {
283                                if ch == '|' {
284                                    result.push('_'); // Mask pipe with underscore
285                                } else {
286                                    result.push(ch);
287                                }
288                            }
289
290                            result.extend(chars[close_start..j].iter());
291                            i = j;
292                            break;
293                        }
294                        // If not matching, continue searching (j is already past these backticks)
295                    } else {
296                        j += 1;
297                    }
298                }
299
300                if !found_closing {
301                    // No matching closing found, treat as regular text
302                    result.extend(chars[start..i].iter());
303                }
304            } else {
305                result.push(chars[i]);
306                i += 1;
307            }
308        }
309
310        result
311    }
312
313    /// Mask escaped pipes for accurate table cell parsing
314    ///
315    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
316    /// - `\|` → escaped pipe → masked (stays as cell content)
317    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
318    ///
319    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
320    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
321    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
322    ///
323    /// To include a literal pipe in a table cell (even in code), you must escape it:
324    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
325    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
326        let mut result = String::new();
327        let chars: Vec<char> = text.chars().collect();
328        let mut i = 0;
329
330        while i < chars.len() {
331            if chars[i] == '\\' {
332                if i + 1 < chars.len() && chars[i + 1] == '\\' {
333                    // Escaped backslash: \\ → push both and continue
334                    // The next character (if it's a pipe) will be a real delimiter
335                    result.push('\\');
336                    result.push('\\');
337                    i += 2;
338                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
339                    // Escaped pipe: \| → mask the pipe
340                    result.push('\\');
341                    result.push('_'); // Mask the pipe
342                    i += 2;
343                } else {
344                    // Single backslash not followed by \ or | → just push it
345                    result.push(chars[i]);
346                    i += 1;
347                }
348            } else {
349                result.push(chars[i]);
350                i += 1;
351            }
352        }
353
354        result
355    }
356
357    /// Split a table row into individual cell contents with flavor-specific behavior.
358    ///
359    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
360    /// This is the foundation for both cell counting and cell content extraction.
361    ///
362    /// Different Markdown flavors handle pipes inside inline code differently:
363    /// - Standard/GFM: Pipes in backticks ARE cell delimiters (GitHub behavior)
364    /// - MkDocs: Pipes in backticks are NOT cell delimiters (Python-Markdown behavior)
365    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
366        let trimmed = row.trim();
367
368        if !trimmed.contains('|') {
369            return Vec::new();
370        }
371
372        // First, mask escaped pipes (same for all flavors)
373        let masked = Self::mask_pipes_for_table_parsing(trimmed);
374
375        // For MkDocs flavor, also mask pipes inside inline code
376        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
377            Self::mask_pipes_in_inline_code(&masked)
378        } else {
379            masked
380        };
381
382        let has_leading = final_masked.starts_with('|');
383        let has_trailing = final_masked.ends_with('|');
384
385        let mut masked_content = final_masked.as_str();
386        let mut orig_content = trimmed;
387
388        if has_leading {
389            masked_content = &masked_content[1..];
390            orig_content = &orig_content[1..];
391        }
392
393        // Track whether we actually strip a trailing pipe
394        let stripped_trailing = has_trailing && !masked_content.is_empty();
395        if stripped_trailing {
396            masked_content = &masked_content[..masked_content.len() - 1];
397            orig_content = &orig_content[..orig_content.len() - 1];
398        }
399
400        // Handle edge cases for degenerate inputs
401        if masked_content.is_empty() {
402            if stripped_trailing {
403                // "||" case: two pipes with empty content between = one empty cell
404                return vec![String::new()];
405            } else {
406                // "|" case: single pipe, not a valid table row
407                return Vec::new();
408            }
409        }
410
411        let masked_parts: Vec<&str> = masked_content.split('|').collect();
412        let mut cells = Vec::new();
413        let mut pos = 0;
414
415        for masked_cell in masked_parts {
416            let cell_len = masked_cell.len();
417            let orig_cell = if pos + cell_len <= orig_content.len() {
418                &orig_content[pos..pos + cell_len]
419            } else {
420                masked_cell
421            };
422            cells.push(orig_cell.to_string());
423            pos += cell_len + 1; // +1 for the pipe delimiter
424        }
425
426        cells
427    }
428
429    /// Split a table row into individual cell contents using Standard/GFM behavior.
430    pub fn split_table_row(row: &str) -> Vec<String> {
431        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
432    }
433
434    /// Determine the pipe style of a table row
435    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
436        let trimmed = line.trim();
437        if !trimmed.contains('|') {
438            return None;
439        }
440
441        let has_leading = trimmed.starts_with('|');
442        let has_trailing = trimmed.ends_with('|');
443
444        match (has_leading, has_trailing) {
445            (true, true) => Some("leading_and_trailing"),
446            (true, false) => Some("leading_only"),
447            (false, true) => Some("trailing_only"),
448            (false, false) => Some("no_leading_or_trailing"),
449        }
450    }
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use crate::lint_context::LintContext;
457
458    #[test]
459    fn test_is_potential_table_row() {
460        // Basic valid table rows
461        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
462        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
463        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
464        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
465
466        // Multiple cells
467        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
468
469        // With whitespace
470        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
471        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
472
473        // Not table rows
474        assert!(!TableUtils::is_potential_table_row("- List item"));
475        assert!(!TableUtils::is_potential_table_row("* Another list"));
476        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
477        assert!(!TableUtils::is_potential_table_row("Regular text"));
478        assert!(!TableUtils::is_potential_table_row(""));
479        assert!(!TableUtils::is_potential_table_row("   "));
480
481        // Code blocks
482        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
483        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
484
485        // Single pipe not enough
486        assert!(!TableUtils::is_potential_table_row("Just one |"));
487        assert!(!TableUtils::is_potential_table_row("| Just one"));
488
489        // Very long cells are valid in tables (no length limit for cell content)
490        let long_cell = "a".repeat(150);
491        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
492
493        // Cells with newlines
494        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
495
496        // Empty cells (Issue #129)
497        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
498        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
499        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
500    }
501
502    #[test]
503    fn test_list_items_with_pipes_not_table_rows() {
504        // Ordered list items should NOT be detected as table rows
505        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
506        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
507        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
508        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
509        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
510
511        // Unordered list items with tabs
512        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
513        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
514        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
515
516        // Indented list items (the trim_start normalizes indentation)
517        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
518        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
519        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
520
521        // Task list items
522        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
523        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
524
525        // Multiple pipes in list items
526        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
527        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
528
529        // These SHOULD still be detected as potential table rows
530        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
531        assert!(TableUtils::is_potential_table_row("cell | cell"));
532        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
533    }
534
535    #[test]
536    fn test_is_delimiter_row() {
537        // Basic delimiter rows
538        assert!(TableUtils::is_delimiter_row("|---|---|"));
539        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
540        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
541        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
542
543        // With varying dash counts
544        assert!(TableUtils::is_delimiter_row("|-|--|"));
545        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
546
547        // With whitespace
548        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
549        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
550
551        // Multiple columns
552        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
553
554        // Without leading/trailing pipes
555        assert!(TableUtils::is_delimiter_row("--- | ---"));
556        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
557
558        // Not delimiter rows
559        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
560        assert!(!TableUtils::is_delimiter_row("Regular text"));
561        assert!(!TableUtils::is_delimiter_row(""));
562        assert!(!TableUtils::is_delimiter_row("|||"));
563        assert!(!TableUtils::is_delimiter_row("| | |"));
564
565        // Must have dashes
566        assert!(!TableUtils::is_delimiter_row("| : | : |"));
567        assert!(!TableUtils::is_delimiter_row("|    |    |"));
568
569        // Mixed content
570        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
571        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
572    }
573
574    #[test]
575    fn test_count_cells() {
576        // Basic counts
577        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
578        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
579        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
580        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
581
582        // Single cell
583        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
584        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
585
586        // Empty cells
587        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
588        assert_eq!(TableUtils::count_cells("| | | |"), 3);
589
590        // Many cells
591        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
592
593        // Edge cases
594        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
595        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
596
597        // No table
598        assert_eq!(TableUtils::count_cells("Regular text"), 0);
599        assert_eq!(TableUtils::count_cells(""), 0);
600        assert_eq!(TableUtils::count_cells("   "), 0);
601
602        // Whitespace handling
603        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
604        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
605    }
606
607    #[test]
608    fn test_count_cells_with_escaped_pipes() {
609        // In GFM tables, escape handling happens BEFORE cell splitting.
610        // Inline code does NOT protect pipes - they still act as cell delimiters.
611        // To include a literal pipe in a table cell, you MUST escape it with \|
612
613        // Basic table structure
614        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
615        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
616        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
617
618        // Escaped pipes: \| keeps the pipe as content
619        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
620        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
621
622        // Escaped pipes inside backticks (correct way to include | in code in tables)
623        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
624
625        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
626        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
627        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
628
629        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
630        // This matches GitHub's actual rendering where `a | b` splits into two cells
631        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
632        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
633        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
634
635        // The regex example from Issue #34 - pipes in regex patterns need escaping
636        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
637        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
638        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
639        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
640    }
641
642    #[test]
643    fn test_determine_pipe_style() {
644        // All pipe styles
645        assert_eq!(
646            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
647            Some("leading_and_trailing")
648        );
649        assert_eq!(
650            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
651            Some("leading_only")
652        );
653        assert_eq!(
654            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
655            Some("trailing_only")
656        );
657        assert_eq!(
658            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
659            Some("no_leading_or_trailing")
660        );
661
662        // With whitespace
663        assert_eq!(
664            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
665            Some("leading_and_trailing")
666        );
667        assert_eq!(
668            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
669            Some("leading_only")
670        );
671
672        // No pipes
673        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
674        assert_eq!(TableUtils::determine_pipe_style(""), None);
675        assert_eq!(TableUtils::determine_pipe_style("   "), None);
676
677        // Single pipe cases
678        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
679        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
680        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
681    }
682
683    #[test]
684    fn test_find_table_blocks_simple() {
685        let content = "| Header 1 | Header 2 |
686|-----------|-----------|
687| Cell 1    | Cell 2    |
688| Cell 3    | Cell 4    |";
689
690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
691
692        let tables = TableUtils::find_table_blocks(content, &ctx);
693        assert_eq!(tables.len(), 1);
694
695        let table = &tables[0];
696        assert_eq!(table.start_line, 0);
697        assert_eq!(table.end_line, 3);
698        assert_eq!(table.header_line, 0);
699        assert_eq!(table.delimiter_line, 1);
700        assert_eq!(table.content_lines, vec![2, 3]);
701    }
702
703    #[test]
704    fn test_find_table_blocks_multiple() {
705        let content = "Some text
706
707| Table 1 | Col A |
708|----------|-------|
709| Data 1   | Val 1 |
710
711More text
712
713| Table 2 | Col 2 |
714|----------|-------|
715| Data 2   | Data  |";
716
717        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
718
719        let tables = TableUtils::find_table_blocks(content, &ctx);
720        assert_eq!(tables.len(), 2);
721
722        // First table
723        assert_eq!(tables[0].start_line, 2);
724        assert_eq!(tables[0].end_line, 4);
725        assert_eq!(tables[0].header_line, 2);
726        assert_eq!(tables[0].delimiter_line, 3);
727        assert_eq!(tables[0].content_lines, vec![4]);
728
729        // Second table
730        assert_eq!(tables[1].start_line, 8);
731        assert_eq!(tables[1].end_line, 10);
732        assert_eq!(tables[1].header_line, 8);
733        assert_eq!(tables[1].delimiter_line, 9);
734        assert_eq!(tables[1].content_lines, vec![10]);
735    }
736
737    #[test]
738    fn test_find_table_blocks_no_content_rows() {
739        let content = "| Header 1 | Header 2 |
740|-----------|-----------|
741
742Next paragraph";
743
744        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
745
746        let tables = TableUtils::find_table_blocks(content, &ctx);
747        assert_eq!(tables.len(), 1);
748
749        let table = &tables[0];
750        assert_eq!(table.start_line, 0);
751        assert_eq!(table.end_line, 1); // Just header and delimiter
752        assert_eq!(table.content_lines.len(), 0);
753    }
754
755    #[test]
756    fn test_find_table_blocks_in_code_block() {
757        let content = "```
758| Not | A | Table |
759|-----|---|-------|
760| In  | Code | Block |
761```
762
763| Real | Table |
764|------|-------|
765| Data | Here  |";
766
767        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
768
769        let tables = TableUtils::find_table_blocks(content, &ctx);
770        assert_eq!(tables.len(), 1); // Only the table outside code block
771
772        let table = &tables[0];
773        assert_eq!(table.header_line, 6);
774        assert_eq!(table.delimiter_line, 7);
775    }
776
777    #[test]
778    fn test_find_table_blocks_no_tables() {
779        let content = "Just regular text
780No tables here
781- List item with | pipe
782* Another list item";
783
784        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
785
786        let tables = TableUtils::find_table_blocks(content, &ctx);
787        assert_eq!(tables.len(), 0);
788    }
789
790    #[test]
791    fn test_find_table_blocks_malformed() {
792        let content = "| Header without delimiter |
793| This looks like table |
794But no delimiter row
795
796| Proper | Table |
797|---------|-------|
798| Data    | Here  |";
799
800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
801
802        let tables = TableUtils::find_table_blocks(content, &ctx);
803        assert_eq!(tables.len(), 1); // Only the proper table
804        assert_eq!(tables[0].header_line, 4);
805    }
806
807    #[test]
808    fn test_edge_cases() {
809        // Test empty content
810        assert!(!TableUtils::is_potential_table_row(""));
811        assert!(!TableUtils::is_delimiter_row(""));
812        assert_eq!(TableUtils::count_cells(""), 0);
813        assert_eq!(TableUtils::determine_pipe_style(""), None);
814
815        // Test whitespace only
816        assert!(!TableUtils::is_potential_table_row("   "));
817        assert!(!TableUtils::is_delimiter_row("   "));
818        assert_eq!(TableUtils::count_cells("   "), 0);
819        assert_eq!(TableUtils::determine_pipe_style("   "), None);
820
821        // Test single character
822        assert!(!TableUtils::is_potential_table_row("|"));
823        assert!(!TableUtils::is_delimiter_row("|"));
824        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
825
826        // Test very long lines are valid table rows (no length limit)
827        // Test both single-column and multi-column long lines
828        let long_single = format!("| {} |", "a".repeat(200));
829        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
830
831        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
832        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
833
834        // Test unicode
835        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
836        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
837        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
838    }
839
840    #[test]
841    fn test_table_block_struct() {
842        let block = TableBlock {
843            start_line: 0,
844            end_line: 5,
845            header_line: 0,
846            delimiter_line: 1,
847            content_lines: vec![2, 3, 4, 5],
848        };
849
850        // Test Debug trait
851        let debug_str = format!("{block:?}");
852        assert!(debug_str.contains("TableBlock"));
853        assert!(debug_str.contains("start_line: 0"));
854
855        // Test Clone trait
856        let cloned = block.clone();
857        assert_eq!(cloned.start_line, block.start_line);
858        assert_eq!(cloned.end_line, block.end_line);
859        assert_eq!(cloned.header_line, block.header_line);
860        assert_eq!(cloned.delimiter_line, block.delimiter_line);
861        assert_eq!(cloned.content_lines, block.content_lines);
862    }
863
864    #[test]
865    fn test_split_table_row() {
866        // Basic split
867        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
868        assert_eq!(cells.len(), 3);
869        assert_eq!(cells[0].trim(), "Cell 1");
870        assert_eq!(cells[1].trim(), "Cell 2");
871        assert_eq!(cells[2].trim(), "Cell 3");
872
873        // Without trailing pipe
874        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
875        assert_eq!(cells.len(), 2);
876
877        // Empty cells
878        let cells = TableUtils::split_table_row("| | | |");
879        assert_eq!(cells.len(), 3);
880
881        // Single cell
882        let cells = TableUtils::split_table_row("| Cell |");
883        assert_eq!(cells.len(), 1);
884        assert_eq!(cells[0].trim(), "Cell");
885
886        // No pipes
887        let cells = TableUtils::split_table_row("No pipes here");
888        assert_eq!(cells.len(), 0);
889    }
890
891    #[test]
892    fn test_split_table_row_with_escaped_pipes() {
893        // Escaped pipes should be preserved in cell content
894        let cells = TableUtils::split_table_row(r"| A | B \| C |");
895        assert_eq!(cells.len(), 2);
896        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
897
898        // Double backslash + pipe is NOT escaped
899        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
900        assert_eq!(cells.len(), 3);
901    }
902
903    #[test]
904    fn test_split_table_row_with_flavor_mkdocs() {
905        // MkDocs flavor: pipes in inline code are NOT cell delimiters
906        let cells =
907            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
908        assert_eq!(cells.len(), 2);
909        assert!(
910            cells[1].contains("`x | y`"),
911            "Inline code with pipe should be single cell in MkDocs flavor"
912        );
913
914        // Multiple pipes in inline code
915        let cells =
916            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
917        assert_eq!(cells.len(), 2);
918        assert!(cells[1].contains("`a | b | c`"));
919    }
920
921    #[test]
922    fn test_split_table_row_with_flavor_standard() {
923        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
924        let cells =
925            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
926        // In GFM, `x | y` splits into separate cells
927        assert_eq!(cells.len(), 3);
928    }
929}