rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        // Unordered list items with space or tab after marker
28        if trimmed.starts_with("- ")
29            || trimmed.starts_with("* ")
30            || trimmed.starts_with("+ ")
31            || trimmed.starts_with("-\t")
32            || trimmed.starts_with("*\t")
33            || trimmed.starts_with("+\t")
34        {
35            return false;
36        }
37
38        // Skip ordered list items: digits followed by . or ) then space/tab
39        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
40            && first_non_digit > 0
41        {
42            let after_digits = &trimmed[first_non_digit..];
43            if after_digits.starts_with(". ")
44                || after_digits.starts_with(".\t")
45                || after_digits.starts_with(") ")
46                || after_digits.starts_with(")\t")
47            {
48                return false;
49            }
50        }
51
52        // Skip lines that are clearly code or inline code
53        if trimmed.starts_with("`") || trimmed.contains("``") {
54            return false;
55        }
56
57        // Must have at least 2 parts when split by |
58        let parts: Vec<&str> = trimmed.split('|').collect();
59        if parts.len() < 2 {
60            return false;
61        }
62
63        // Check if it looks like a table row by having reasonable content between pipes
64        let mut valid_parts = 0;
65        let mut total_non_empty_parts = 0;
66
67        for part in &parts {
68            let part_trimmed = part.trim();
69            // Skip empty parts (from leading/trailing pipes)
70            if part_trimmed.is_empty() {
71                continue;
72            }
73            total_non_empty_parts += 1;
74
75            // Count parts that look like table cells (reasonable content, no newlines)
76            if !part_trimmed.contains('\n') {
77                valid_parts += 1;
78            }
79        }
80
81        // Check if all non-empty parts are valid (no newlines)
82        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
83            // Some cells contain newlines, not a valid table row
84            return false;
85        }
86
87        // GFM allows tables with all empty cells (e.g., |||)
88        // These are valid if they have proper table formatting (leading and trailing pipes)
89        if total_non_empty_parts == 0 {
90            // Empty cells are only valid with proper pipe formatting
91            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
92        }
93
94        // GFM allows single-column tables, so >= 1 valid part is enough
95        // when the line has proper table formatting (pipes)
96        if trimmed.starts_with('|') && trimmed.ends_with('|') {
97            // Properly formatted table row with pipes on both ends
98            valid_parts >= 1
99        } else {
100            // For rows without proper pipe formatting, require at least 2 cells
101            valid_parts >= 2
102        }
103    }
104
105    /// Check if a line is a table delimiter row (e.g., |---|---|)
106    pub fn is_delimiter_row(line: &str) -> bool {
107        let trimmed = line.trim();
108        if !trimmed.contains('|') || !trimmed.contains('-') {
109            return false;
110        }
111
112        // Split by pipes and check each part
113        let parts: Vec<&str> = trimmed.split('|').collect();
114        let mut valid_delimiter_parts = 0;
115        let mut total_non_empty_parts = 0;
116
117        for part in &parts {
118            let part_trimmed = part.trim();
119            if part_trimmed.is_empty() {
120                continue; // Skip empty parts from leading/trailing pipes
121            }
122
123            total_non_empty_parts += 1;
124
125            // Check if this part looks like a delimiter (contains dashes and optionally colons)
126            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
127                valid_delimiter_parts += 1;
128            }
129        }
130
131        // All non-empty parts must be valid delimiters, and there must be at least one
132        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
133    }
134
135    /// Strip blockquote prefix from a line, returning the content without the prefix
136    fn strip_blockquote_prefix(line: &str) -> &str {
137        let trimmed = line.trim_start();
138        if trimmed.starts_with('>') {
139            // Strip all blockquote markers and following space
140            let mut rest = trimmed;
141            while rest.starts_with('>') {
142                rest = rest.strip_prefix('>').unwrap_or(rest);
143                rest = rest.trim_start_matches(' ');
144            }
145            rest
146        } else {
147            line
148        }
149    }
150
151    /// Find all table blocks in the content with optimized detection
152    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
153    pub fn find_table_blocks_with_code_info(
154        content: &str,
155        code_blocks: &[(usize, usize)],
156        code_spans: &[crate::lint_context::CodeSpan],
157        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
158    ) -> Vec<TableBlock> {
159        let lines: Vec<&str> = content.lines().collect();
160        let mut tables = Vec::new();
161        let mut i = 0;
162
163        // Pre-compute line positions for efficient code block checking
164        let mut line_positions = Vec::with_capacity(lines.len());
165        let mut pos = 0;
166        for line in &lines {
167            line_positions.push(pos);
168            pos += line.len() + 1; // +1 for newline
169        }
170
171        while i < lines.len() {
172            // Skip lines in code blocks, code spans, or HTML comments
173            let line_start = line_positions[i];
174            let in_code =
175                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
176                    || code_spans
177                        .iter()
178                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
179            let in_html_comment = html_comment_ranges
180                .iter()
181                .any(|range| line_start >= range.start && line_start < range.end);
182
183            if in_code || in_html_comment {
184                i += 1;
185                continue;
186            }
187
188            // Strip blockquote prefix for table detection
189            let line_content = Self::strip_blockquote_prefix(lines[i]);
190
191            // Look for potential table start
192            if Self::is_potential_table_row(line_content) {
193                // Check if the next line is a delimiter row (also strip blockquote prefix)
194                let next_line_content = if i + 1 < lines.len() {
195                    Self::strip_blockquote_prefix(lines[i + 1])
196                } else {
197                    ""
198                };
199                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
200                    // Found a table! Find its end
201                    let table_start = i;
202                    let header_line = i;
203                    let delimiter_line = i + 1;
204                    let mut table_end = i + 1; // Include the delimiter row
205                    let mut content_lines = Vec::new();
206
207                    // Continue while we have table rows
208                    let mut j = i + 2;
209                    while j < lines.len() {
210                        let line = lines[j];
211                        // Strip blockquote prefix for checking
212                        let line_content = Self::strip_blockquote_prefix(line);
213                        if line_content.trim().is_empty() {
214                            // Empty line ends the table (including blockquote blank lines like ">")
215                            break;
216                        }
217                        if Self::is_potential_table_row(line_content) {
218                            content_lines.push(j);
219                            table_end = j;
220                            j += 1;
221                        } else {
222                            // Non-table line ends the table
223                            break;
224                        }
225                    }
226
227                    tables.push(TableBlock {
228                        start_line: table_start,
229                        end_line: table_end,
230                        header_line,
231                        delimiter_line,
232                        content_lines,
233                    });
234                    i = table_end + 1;
235                } else {
236                    i += 1;
237                }
238            } else {
239                i += 1;
240            }
241        }
242
243        tables
244    }
245
246    /// Find all table blocks in the content with optimized detection
247    /// This is a backward-compatible wrapper that accepts LintContext
248    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
249        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
250    }
251
252    /// Count the number of cells in a table row
253    pub fn count_cells(row: &str) -> usize {
254        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
255    }
256
257    /// Count the number of cells in a table row with flavor-specific behavior
258    ///
259    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
260    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
261    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
262        Self::split_table_row_with_flavor(row, flavor).len()
263    }
264
265    /// Mask pipes inside inline code blocks with a placeholder character
266    pub fn mask_pipes_in_inline_code(text: &str) -> String {
267        let mut result = String::new();
268        let chars: Vec<char> = text.chars().collect();
269        let mut i = 0;
270
271        while i < chars.len() {
272            if chars[i] == '`' {
273                // Count consecutive backticks at start
274                let start = i;
275                let mut backtick_count = 0;
276                while i < chars.len() && chars[i] == '`' {
277                    backtick_count += 1;
278                    i += 1;
279                }
280
281                // Look for matching closing backticks
282                let mut found_closing = false;
283                let mut j = i;
284
285                while j < chars.len() {
286                    if chars[j] == '`' {
287                        // Count potential closing backticks
288                        let close_start = j;
289                        let mut close_count = 0;
290                        while j < chars.len() && chars[j] == '`' {
291                            close_count += 1;
292                            j += 1;
293                        }
294
295                        if close_count == backtick_count {
296                            // Found matching closing backticks
297                            found_closing = true;
298
299                            // Valid inline code - add with pipes masked
300                            result.extend(chars[start..i].iter());
301
302                            for &ch in chars.iter().take(close_start).skip(i) {
303                                if ch == '|' {
304                                    result.push('_'); // Mask pipe with underscore
305                                } else {
306                                    result.push(ch);
307                                }
308                            }
309
310                            result.extend(chars[close_start..j].iter());
311                            i = j;
312                            break;
313                        }
314                        // If not matching, continue searching (j is already past these backticks)
315                    } else {
316                        j += 1;
317                    }
318                }
319
320                if !found_closing {
321                    // No matching closing found, treat as regular text
322                    result.extend(chars[start..i].iter());
323                }
324            } else {
325                result.push(chars[i]);
326                i += 1;
327            }
328        }
329
330        result
331    }
332
333    /// Escape pipes inside inline code blocks with backslash.
334    /// Converts `|` to `\|` inside backtick spans.
335    /// Used by auto-fix to preserve content while making tables valid.
336    pub fn escape_pipes_in_inline_code(text: &str) -> String {
337        let mut result = String::new();
338        let chars: Vec<char> = text.chars().collect();
339        let mut i = 0;
340
341        while i < chars.len() {
342            if chars[i] == '`' {
343                let start = i;
344                let mut backtick_count = 0;
345                while i < chars.len() && chars[i] == '`' {
346                    backtick_count += 1;
347                    i += 1;
348                }
349
350                let mut found_closing = false;
351                let mut j = i;
352
353                while j < chars.len() {
354                    if chars[j] == '`' {
355                        let close_start = j;
356                        let mut close_count = 0;
357                        while j < chars.len() && chars[j] == '`' {
358                            close_count += 1;
359                            j += 1;
360                        }
361
362                        if close_count == backtick_count {
363                            found_closing = true;
364                            result.extend(chars[start..i].iter());
365
366                            for &ch in chars.iter().take(close_start).skip(i) {
367                                if ch == '|' {
368                                    result.push('\\');
369                                    result.push('|');
370                                } else {
371                                    result.push(ch);
372                                }
373                            }
374
375                            result.extend(chars[close_start..j].iter());
376                            i = j;
377                            break;
378                        }
379                    } else {
380                        j += 1;
381                    }
382                }
383
384                if !found_closing {
385                    result.extend(chars[start..i].iter());
386                }
387            } else {
388                result.push(chars[i]);
389                i += 1;
390            }
391        }
392
393        result
394    }
395
396    /// Mask escaped pipes for accurate table cell parsing
397    ///
398    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
399    /// - `\|` → escaped pipe → masked (stays as cell content)
400    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
401    ///
402    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
403    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
404    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
405    ///
406    /// To include a literal pipe in a table cell (even in code), you must escape it:
407    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
408    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
409        let mut result = String::new();
410        let chars: Vec<char> = text.chars().collect();
411        let mut i = 0;
412
413        while i < chars.len() {
414            if chars[i] == '\\' {
415                if i + 1 < chars.len() && chars[i + 1] == '\\' {
416                    // Escaped backslash: \\ → push both and continue
417                    // The next character (if it's a pipe) will be a real delimiter
418                    result.push('\\');
419                    result.push('\\');
420                    i += 2;
421                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
422                    // Escaped pipe: \| → mask the pipe
423                    result.push('\\');
424                    result.push('_'); // Mask the pipe
425                    i += 2;
426                } else {
427                    // Single backslash not followed by \ or | → just push it
428                    result.push(chars[i]);
429                    i += 1;
430                }
431            } else {
432                result.push(chars[i]);
433                i += 1;
434            }
435        }
436
437        result
438    }
439
440    /// Split a table row into individual cell contents with flavor-specific behavior.
441    ///
442    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
443    /// This is the foundation for both cell counting and cell content extraction.
444    ///
445    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
446    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
447    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
448        let trimmed = row.trim();
449
450        if !trimmed.contains('|') {
451            return Vec::new();
452        }
453
454        // First, mask escaped pipes (same for all flavors)
455        let masked = Self::mask_pipes_for_table_parsing(trimmed);
456
457        // For MkDocs flavor, also mask pipes inside inline code
458        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
459            Self::mask_pipes_in_inline_code(&masked)
460        } else {
461            masked
462        };
463
464        let has_leading = final_masked.starts_with('|');
465        let has_trailing = final_masked.ends_with('|');
466
467        let mut masked_content = final_masked.as_str();
468        let mut orig_content = trimmed;
469
470        if has_leading {
471            masked_content = &masked_content[1..];
472            orig_content = &orig_content[1..];
473        }
474
475        // Track whether we actually strip a trailing pipe
476        let stripped_trailing = has_trailing && !masked_content.is_empty();
477        if stripped_trailing {
478            masked_content = &masked_content[..masked_content.len() - 1];
479            orig_content = &orig_content[..orig_content.len() - 1];
480        }
481
482        // Handle edge cases for degenerate inputs
483        if masked_content.is_empty() {
484            if stripped_trailing {
485                // "||" case: two pipes with empty content between = one empty cell
486                return vec![String::new()];
487            } else {
488                // "|" case: single pipe, not a valid table row
489                return Vec::new();
490            }
491        }
492
493        let masked_parts: Vec<&str> = masked_content.split('|').collect();
494        let mut cells = Vec::new();
495        let mut pos = 0;
496
497        for masked_cell in masked_parts {
498            let cell_len = masked_cell.len();
499            let orig_cell = if pos + cell_len <= orig_content.len() {
500                &orig_content[pos..pos + cell_len]
501            } else {
502                masked_cell
503            };
504            cells.push(orig_cell.to_string());
505            pos += cell_len + 1; // +1 for the pipe delimiter
506        }
507
508        cells
509    }
510
511    /// Split a table row into individual cell contents using Standard/GFM behavior.
512    pub fn split_table_row(row: &str) -> Vec<String> {
513        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
514    }
515
516    /// Determine the pipe style of a table row
517    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
518        let trimmed = line.trim();
519        if !trimmed.contains('|') {
520            return None;
521        }
522
523        let has_leading = trimmed.starts_with('|');
524        let has_trailing = trimmed.ends_with('|');
525
526        match (has_leading, has_trailing) {
527            (true, true) => Some("leading_and_trailing"),
528            (true, false) => Some("leading_only"),
529            (false, true) => Some("trailing_only"),
530            (false, false) => Some("no_leading_or_trailing"),
531        }
532    }
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538    use crate::lint_context::LintContext;
539
540    #[test]
541    fn test_is_potential_table_row() {
542        // Basic valid table rows
543        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
544        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
545        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
546        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
547
548        // Multiple cells
549        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
550
551        // With whitespace
552        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
553        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
554
555        // Not table rows
556        assert!(!TableUtils::is_potential_table_row("- List item"));
557        assert!(!TableUtils::is_potential_table_row("* Another list"));
558        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
559        assert!(!TableUtils::is_potential_table_row("Regular text"));
560        assert!(!TableUtils::is_potential_table_row(""));
561        assert!(!TableUtils::is_potential_table_row("   "));
562
563        // Code blocks
564        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
565        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
566
567        // Single pipe not enough
568        assert!(!TableUtils::is_potential_table_row("Just one |"));
569        assert!(!TableUtils::is_potential_table_row("| Just one"));
570
571        // Very long cells are valid in tables (no length limit for cell content)
572        let long_cell = "a".repeat(150);
573        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
574
575        // Cells with newlines
576        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
577
578        // Empty cells (Issue #129)
579        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
580        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
581        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
582    }
583
584    #[test]
585    fn test_list_items_with_pipes_not_table_rows() {
586        // Ordered list items should NOT be detected as table rows
587        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
588        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
589        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
590        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
591        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
592
593        // Unordered list items with tabs
594        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
595        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
596        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
597
598        // Indented list items (the trim_start normalizes indentation)
599        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
600        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
601        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
602
603        // Task list items
604        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
605        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
606
607        // Multiple pipes in list items
608        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
609        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
610
611        // These SHOULD still be detected as potential table rows
612        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
613        assert!(TableUtils::is_potential_table_row("cell | cell"));
614        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
615    }
616
617    #[test]
618    fn test_is_delimiter_row() {
619        // Basic delimiter rows
620        assert!(TableUtils::is_delimiter_row("|---|---|"));
621        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
622        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
623        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
624
625        // With varying dash counts
626        assert!(TableUtils::is_delimiter_row("|-|--|"));
627        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
628
629        // With whitespace
630        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
631        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
632
633        // Multiple columns
634        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
635
636        // Without leading/trailing pipes
637        assert!(TableUtils::is_delimiter_row("--- | ---"));
638        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
639
640        // Not delimiter rows
641        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
642        assert!(!TableUtils::is_delimiter_row("Regular text"));
643        assert!(!TableUtils::is_delimiter_row(""));
644        assert!(!TableUtils::is_delimiter_row("|||"));
645        assert!(!TableUtils::is_delimiter_row("| | |"));
646
647        // Must have dashes
648        assert!(!TableUtils::is_delimiter_row("| : | : |"));
649        assert!(!TableUtils::is_delimiter_row("|    |    |"));
650
651        // Mixed content
652        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
653        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
654    }
655
656    #[test]
657    fn test_count_cells() {
658        // Basic counts
659        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
660        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
661        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
662        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
663
664        // Single cell
665        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
666        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
667
668        // Empty cells
669        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
670        assert_eq!(TableUtils::count_cells("| | | |"), 3);
671
672        // Many cells
673        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
674
675        // Edge cases
676        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
677        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
678
679        // No table
680        assert_eq!(TableUtils::count_cells("Regular text"), 0);
681        assert_eq!(TableUtils::count_cells(""), 0);
682        assert_eq!(TableUtils::count_cells("   "), 0);
683
684        // Whitespace handling
685        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
686        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
687    }
688
689    #[test]
690    fn test_count_cells_with_escaped_pipes() {
691        // In GFM tables, escape handling happens BEFORE cell splitting.
692        // Inline code does NOT protect pipes - they still act as cell delimiters.
693        // To include a literal pipe in a table cell, you MUST escape it with \|
694
695        // Basic table structure
696        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
697        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
698        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
699
700        // Escaped pipes: \| keeps the pipe as content
701        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
702        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
703
704        // Escaped pipes inside backticks (correct way to include | in code in tables)
705        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
706
707        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
708        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
709        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
710
711        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
712        // This matches GitHub's actual rendering where `a | b` splits into two cells
713        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
714        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
715        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
716
717        // The regex example from Issue #34 - pipes in regex patterns need escaping
718        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
719        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
720        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
721        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
722    }
723
724    #[test]
725    fn test_determine_pipe_style() {
726        // All pipe styles
727        assert_eq!(
728            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
729            Some("leading_and_trailing")
730        );
731        assert_eq!(
732            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
733            Some("leading_only")
734        );
735        assert_eq!(
736            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
737            Some("trailing_only")
738        );
739        assert_eq!(
740            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
741            Some("no_leading_or_trailing")
742        );
743
744        // With whitespace
745        assert_eq!(
746            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
747            Some("leading_and_trailing")
748        );
749        assert_eq!(
750            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
751            Some("leading_only")
752        );
753
754        // No pipes
755        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
756        assert_eq!(TableUtils::determine_pipe_style(""), None);
757        assert_eq!(TableUtils::determine_pipe_style("   "), None);
758
759        // Single pipe cases
760        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
761        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
762        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
763    }
764
765    #[test]
766    fn test_find_table_blocks_simple() {
767        let content = "| Header 1 | Header 2 |
768|-----------|-----------|
769| Cell 1    | Cell 2    |
770| Cell 3    | Cell 4    |";
771
772        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
773
774        let tables = TableUtils::find_table_blocks(content, &ctx);
775        assert_eq!(tables.len(), 1);
776
777        let table = &tables[0];
778        assert_eq!(table.start_line, 0);
779        assert_eq!(table.end_line, 3);
780        assert_eq!(table.header_line, 0);
781        assert_eq!(table.delimiter_line, 1);
782        assert_eq!(table.content_lines, vec![2, 3]);
783    }
784
785    #[test]
786    fn test_find_table_blocks_multiple() {
787        let content = "Some text
788
789| Table 1 | Col A |
790|----------|-------|
791| Data 1   | Val 1 |
792
793More text
794
795| Table 2 | Col 2 |
796|----------|-------|
797| Data 2   | Data  |";
798
799        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
800
801        let tables = TableUtils::find_table_blocks(content, &ctx);
802        assert_eq!(tables.len(), 2);
803
804        // First table
805        assert_eq!(tables[0].start_line, 2);
806        assert_eq!(tables[0].end_line, 4);
807        assert_eq!(tables[0].header_line, 2);
808        assert_eq!(tables[0].delimiter_line, 3);
809        assert_eq!(tables[0].content_lines, vec![4]);
810
811        // Second table
812        assert_eq!(tables[1].start_line, 8);
813        assert_eq!(tables[1].end_line, 10);
814        assert_eq!(tables[1].header_line, 8);
815        assert_eq!(tables[1].delimiter_line, 9);
816        assert_eq!(tables[1].content_lines, vec![10]);
817    }
818
819    #[test]
820    fn test_find_table_blocks_no_content_rows() {
821        let content = "| Header 1 | Header 2 |
822|-----------|-----------|
823
824Next paragraph";
825
826        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
827
828        let tables = TableUtils::find_table_blocks(content, &ctx);
829        assert_eq!(tables.len(), 1);
830
831        let table = &tables[0];
832        assert_eq!(table.start_line, 0);
833        assert_eq!(table.end_line, 1); // Just header and delimiter
834        assert_eq!(table.content_lines.len(), 0);
835    }
836
837    #[test]
838    fn test_find_table_blocks_in_code_block() {
839        let content = "```
840| Not | A | Table |
841|-----|---|-------|
842| In  | Code | Block |
843```
844
845| Real | Table |
846|------|-------|
847| Data | Here  |";
848
849        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
850
851        let tables = TableUtils::find_table_blocks(content, &ctx);
852        assert_eq!(tables.len(), 1); // Only the table outside code block
853
854        let table = &tables[0];
855        assert_eq!(table.header_line, 6);
856        assert_eq!(table.delimiter_line, 7);
857    }
858
859    #[test]
860    fn test_find_table_blocks_no_tables() {
861        let content = "Just regular text
862No tables here
863- List item with | pipe
864* Another list item";
865
866        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
867
868        let tables = TableUtils::find_table_blocks(content, &ctx);
869        assert_eq!(tables.len(), 0);
870    }
871
872    #[test]
873    fn test_find_table_blocks_malformed() {
874        let content = "| Header without delimiter |
875| This looks like table |
876But no delimiter row
877
878| Proper | Table |
879|---------|-------|
880| Data    | Here  |";
881
882        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
883
884        let tables = TableUtils::find_table_blocks(content, &ctx);
885        assert_eq!(tables.len(), 1); // Only the proper table
886        assert_eq!(tables[0].header_line, 4);
887    }
888
889    #[test]
890    fn test_edge_cases() {
891        // Test empty content
892        assert!(!TableUtils::is_potential_table_row(""));
893        assert!(!TableUtils::is_delimiter_row(""));
894        assert_eq!(TableUtils::count_cells(""), 0);
895        assert_eq!(TableUtils::determine_pipe_style(""), None);
896
897        // Test whitespace only
898        assert!(!TableUtils::is_potential_table_row("   "));
899        assert!(!TableUtils::is_delimiter_row("   "));
900        assert_eq!(TableUtils::count_cells("   "), 0);
901        assert_eq!(TableUtils::determine_pipe_style("   "), None);
902
903        // Test single character
904        assert!(!TableUtils::is_potential_table_row("|"));
905        assert!(!TableUtils::is_delimiter_row("|"));
906        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
907
908        // Test very long lines are valid table rows (no length limit)
909        // Test both single-column and multi-column long lines
910        let long_single = format!("| {} |", "a".repeat(200));
911        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
912
913        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
914        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
915
916        // Test unicode
917        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
918        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
919        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
920    }
921
922    #[test]
923    fn test_table_block_struct() {
924        let block = TableBlock {
925            start_line: 0,
926            end_line: 5,
927            header_line: 0,
928            delimiter_line: 1,
929            content_lines: vec![2, 3, 4, 5],
930        };
931
932        // Test Debug trait
933        let debug_str = format!("{block:?}");
934        assert!(debug_str.contains("TableBlock"));
935        assert!(debug_str.contains("start_line: 0"));
936
937        // Test Clone trait
938        let cloned = block.clone();
939        assert_eq!(cloned.start_line, block.start_line);
940        assert_eq!(cloned.end_line, block.end_line);
941        assert_eq!(cloned.header_line, block.header_line);
942        assert_eq!(cloned.delimiter_line, block.delimiter_line);
943        assert_eq!(cloned.content_lines, block.content_lines);
944    }
945
946    #[test]
947    fn test_split_table_row() {
948        // Basic split
949        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
950        assert_eq!(cells.len(), 3);
951        assert_eq!(cells[0].trim(), "Cell 1");
952        assert_eq!(cells[1].trim(), "Cell 2");
953        assert_eq!(cells[2].trim(), "Cell 3");
954
955        // Without trailing pipe
956        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
957        assert_eq!(cells.len(), 2);
958
959        // Empty cells
960        let cells = TableUtils::split_table_row("| | | |");
961        assert_eq!(cells.len(), 3);
962
963        // Single cell
964        let cells = TableUtils::split_table_row("| Cell |");
965        assert_eq!(cells.len(), 1);
966        assert_eq!(cells[0].trim(), "Cell");
967
968        // No pipes
969        let cells = TableUtils::split_table_row("No pipes here");
970        assert_eq!(cells.len(), 0);
971    }
972
973    #[test]
974    fn test_split_table_row_with_escaped_pipes() {
975        // Escaped pipes should be preserved in cell content
976        let cells = TableUtils::split_table_row(r"| A | B \| C |");
977        assert_eq!(cells.len(), 2);
978        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
979
980        // Double backslash + pipe is NOT escaped
981        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
982        assert_eq!(cells.len(), 3);
983    }
984
985    #[test]
986    fn test_split_table_row_with_flavor_mkdocs() {
987        // MkDocs flavor: pipes in inline code are NOT cell delimiters
988        let cells =
989            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
990        assert_eq!(cells.len(), 2);
991        assert!(
992            cells[1].contains("`x | y`"),
993            "Inline code with pipe should be single cell in MkDocs flavor"
994        );
995
996        // Multiple pipes in inline code
997        let cells =
998            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
999        assert_eq!(cells.len(), 2);
1000        assert!(cells[1].contains("`a | b | c`"));
1001    }
1002
1003    #[test]
1004    fn test_split_table_row_with_flavor_standard() {
1005        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
1006        let cells =
1007            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1008        // In GFM, `x | y` splits into separate cells
1009        assert_eq!(cells.len(), 3);
1010    }
1011}