rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        // Unordered list items with space or tab after marker
28        if trimmed.starts_with("- ")
29            || trimmed.starts_with("* ")
30            || trimmed.starts_with("+ ")
31            || trimmed.starts_with("-\t")
32            || trimmed.starts_with("*\t")
33            || trimmed.starts_with("+\t")
34        {
35            return false;
36        }
37
38        // Skip ordered list items: digits followed by . or ) then space/tab
39        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
40            && first_non_digit > 0
41        {
42            let after_digits = &trimmed[first_non_digit..];
43            if after_digits.starts_with(". ")
44                || after_digits.starts_with(".\t")
45                || after_digits.starts_with(") ")
46                || after_digits.starts_with(")\t")
47            {
48                return false;
49            }
50        }
51
52        // Skip lines that are clearly code or inline code
53        if trimmed.starts_with("`") || trimmed.contains("``") {
54            return false;
55        }
56
57        // Must have at least 2 parts when split by |
58        let parts: Vec<&str> = trimmed.split('|').collect();
59        if parts.len() < 2 {
60            return false;
61        }
62
63        // Check if it looks like a table row by having reasonable content between pipes
64        let mut valid_parts = 0;
65        let mut total_non_empty_parts = 0;
66
67        for part in &parts {
68            let part_trimmed = part.trim();
69            // Skip empty parts (from leading/trailing pipes)
70            if part_trimmed.is_empty() {
71                continue;
72            }
73            total_non_empty_parts += 1;
74
75            // Count parts that look like table cells (reasonable content, no newlines)
76            if !part_trimmed.contains('\n') {
77                valid_parts += 1;
78            }
79        }
80
81        // Check if all non-empty parts are valid (no newlines)
82        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
83            // Some cells contain newlines, not a valid table row
84            return false;
85        }
86
87        // GFM allows tables with all empty cells (e.g., |||)
88        // These are valid if they have proper table formatting (leading and trailing pipes)
89        if total_non_empty_parts == 0 {
90            // Empty cells are only valid with proper pipe formatting
91            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
92        }
93
94        // GFM allows single-column tables, so >= 1 valid part is enough
95        // when the line has proper table formatting (pipes)
96        if trimmed.starts_with('|') && trimmed.ends_with('|') {
97            // Properly formatted table row with pipes on both ends
98            valid_parts >= 1
99        } else {
100            // For rows without proper pipe formatting, require at least 2 cells
101            valid_parts >= 2
102        }
103    }
104
105    /// Check if a line is a table delimiter row (e.g., |---|---|)
106    pub fn is_delimiter_row(line: &str) -> bool {
107        let trimmed = line.trim();
108        if !trimmed.contains('|') || !trimmed.contains('-') {
109            return false;
110        }
111
112        // Split by pipes and check each part
113        let parts: Vec<&str> = trimmed.split('|').collect();
114        let mut valid_delimiter_parts = 0;
115        let mut total_non_empty_parts = 0;
116
117        for part in &parts {
118            let part_trimmed = part.trim();
119            if part_trimmed.is_empty() {
120                continue; // Skip empty parts from leading/trailing pipes
121            }
122
123            total_non_empty_parts += 1;
124
125            // Check if this part looks like a delimiter (contains dashes and optionally colons)
126            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
127                valid_delimiter_parts += 1;
128            }
129        }
130
131        // All non-empty parts must be valid delimiters, and there must be at least one
132        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
133    }
134
135    /// Strip blockquote prefix from a line, returning the content without the prefix
136    fn strip_blockquote_prefix(line: &str) -> &str {
137        let trimmed = line.trim_start();
138        if trimmed.starts_with('>') {
139            // Strip all blockquote markers and following space
140            let mut rest = trimmed;
141            while rest.starts_with('>') {
142                rest = rest.strip_prefix('>').unwrap_or(rest);
143                rest = rest.trim_start_matches(' ');
144            }
145            rest
146        } else {
147            line
148        }
149    }
150
151    /// Find all table blocks in the content with optimized detection
152    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
153    pub fn find_table_blocks_with_code_info(
154        content: &str,
155        code_blocks: &[(usize, usize)],
156        code_spans: &[crate::lint_context::CodeSpan],
157        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
158    ) -> Vec<TableBlock> {
159        let lines: Vec<&str> = content.lines().collect();
160        let mut tables = Vec::new();
161        let mut i = 0;
162
163        // Pre-compute line positions for efficient code block checking
164        let mut line_positions = Vec::with_capacity(lines.len());
165        let mut pos = 0;
166        for line in &lines {
167            line_positions.push(pos);
168            pos += line.len() + 1; // +1 for newline
169        }
170
171        while i < lines.len() {
172            // Skip lines in code blocks, code spans, or HTML comments
173            let line_start = line_positions[i];
174            let in_code =
175                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
176                    || code_spans
177                        .iter()
178                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
179            let in_html_comment = html_comment_ranges
180                .iter()
181                .any(|range| line_start >= range.start && line_start < range.end);
182
183            if in_code || in_html_comment {
184                i += 1;
185                continue;
186            }
187
188            // Strip blockquote prefix for table detection
189            let line_content = Self::strip_blockquote_prefix(lines[i]);
190
191            // Look for potential table start
192            if Self::is_potential_table_row(line_content) {
193                // Check if the next line is a delimiter row (also strip blockquote prefix)
194                let next_line_content = if i + 1 < lines.len() {
195                    Self::strip_blockquote_prefix(lines[i + 1])
196                } else {
197                    ""
198                };
199                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
200                    // Found a table! Find its end
201                    let table_start = i;
202                    let header_line = i;
203                    let delimiter_line = i + 1;
204                    let mut table_end = i + 1; // Include the delimiter row
205                    let mut content_lines = Vec::new();
206
207                    // Continue while we have table rows
208                    let mut j = i + 2;
209                    while j < lines.len() {
210                        let line = lines[j];
211                        // Strip blockquote prefix for checking
212                        let line_content = Self::strip_blockquote_prefix(line);
213                        if line_content.trim().is_empty() {
214                            // Empty line ends the table (including blockquote blank lines like ">")
215                            break;
216                        }
217                        if Self::is_potential_table_row(line_content) {
218                            content_lines.push(j);
219                            table_end = j;
220                            j += 1;
221                        } else {
222                            // Non-table line ends the table
223                            break;
224                        }
225                    }
226
227                    tables.push(TableBlock {
228                        start_line: table_start,
229                        end_line: table_end,
230                        header_line,
231                        delimiter_line,
232                        content_lines,
233                    });
234                    i = table_end + 1;
235                } else {
236                    i += 1;
237                }
238            } else {
239                i += 1;
240            }
241        }
242
243        tables
244    }
245
246    /// Find all table blocks in the content with optimized detection
247    /// This is a backward-compatible wrapper that accepts LintContext
248    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
249        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
250    }
251
252    /// Count the number of cells in a table row
253    pub fn count_cells(row: &str) -> usize {
254        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
255    }
256
257    /// Count the number of cells in a table row with flavor-specific behavior
258    ///
259    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
260    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
261    ///
262    /// This function strips blockquote prefixes before counting cells, so it works
263    /// correctly for tables inside blockquotes.
264    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
265        // Strip blockquote prefix if present before counting cells
266        let (_, content) = Self::extract_blockquote_prefix(row);
267        Self::split_table_row_with_flavor(content, flavor).len()
268    }
269
270    /// Mask pipes inside inline code blocks with a placeholder character
271    pub fn mask_pipes_in_inline_code(text: &str) -> String {
272        let mut result = String::new();
273        let chars: Vec<char> = text.chars().collect();
274        let mut i = 0;
275
276        while i < chars.len() {
277            if chars[i] == '`' {
278                // Count consecutive backticks at start
279                let start = i;
280                let mut backtick_count = 0;
281                while i < chars.len() && chars[i] == '`' {
282                    backtick_count += 1;
283                    i += 1;
284                }
285
286                // Look for matching closing backticks
287                let mut found_closing = false;
288                let mut j = i;
289
290                while j < chars.len() {
291                    if chars[j] == '`' {
292                        // Count potential closing backticks
293                        let close_start = j;
294                        let mut close_count = 0;
295                        while j < chars.len() && chars[j] == '`' {
296                            close_count += 1;
297                            j += 1;
298                        }
299
300                        if close_count == backtick_count {
301                            // Found matching closing backticks
302                            found_closing = true;
303
304                            // Valid inline code - add with pipes masked
305                            result.extend(chars[start..i].iter());
306
307                            for &ch in chars.iter().take(close_start).skip(i) {
308                                if ch == '|' {
309                                    result.push('_'); // Mask pipe with underscore
310                                } else {
311                                    result.push(ch);
312                                }
313                            }
314
315                            result.extend(chars[close_start..j].iter());
316                            i = j;
317                            break;
318                        }
319                        // If not matching, continue searching (j is already past these backticks)
320                    } else {
321                        j += 1;
322                    }
323                }
324
325                if !found_closing {
326                    // No matching closing found, treat as regular text
327                    result.extend(chars[start..i].iter());
328                }
329            } else {
330                result.push(chars[i]);
331                i += 1;
332            }
333        }
334
335        result
336    }
337
338    /// Escape pipes inside inline code blocks with backslash.
339    /// Converts `|` to `\|` inside backtick spans.
340    /// Used by auto-fix to preserve content while making tables valid.
341    pub fn escape_pipes_in_inline_code(text: &str) -> String {
342        let mut result = String::new();
343        let chars: Vec<char> = text.chars().collect();
344        let mut i = 0;
345
346        while i < chars.len() {
347            if chars[i] == '`' {
348                let start = i;
349                let mut backtick_count = 0;
350                while i < chars.len() && chars[i] == '`' {
351                    backtick_count += 1;
352                    i += 1;
353                }
354
355                let mut found_closing = false;
356                let mut j = i;
357
358                while j < chars.len() {
359                    if chars[j] == '`' {
360                        let close_start = j;
361                        let mut close_count = 0;
362                        while j < chars.len() && chars[j] == '`' {
363                            close_count += 1;
364                            j += 1;
365                        }
366
367                        if close_count == backtick_count {
368                            found_closing = true;
369                            result.extend(chars[start..i].iter());
370
371                            for &ch in chars.iter().take(close_start).skip(i) {
372                                if ch == '|' {
373                                    result.push('\\');
374                                    result.push('|');
375                                } else {
376                                    result.push(ch);
377                                }
378                            }
379
380                            result.extend(chars[close_start..j].iter());
381                            i = j;
382                            break;
383                        }
384                    } else {
385                        j += 1;
386                    }
387                }
388
389                if !found_closing {
390                    result.extend(chars[start..i].iter());
391                }
392            } else {
393                result.push(chars[i]);
394                i += 1;
395            }
396        }
397
398        result
399    }
400
401    /// Mask escaped pipes for accurate table cell parsing
402    ///
403    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
404    /// - `\|` → escaped pipe → masked (stays as cell content)
405    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
406    ///
407    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
408    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
409    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
410    ///
411    /// To include a literal pipe in a table cell (even in code), you must escape it:
412    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
413    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
414        let mut result = String::new();
415        let chars: Vec<char> = text.chars().collect();
416        let mut i = 0;
417
418        while i < chars.len() {
419            if chars[i] == '\\' {
420                if i + 1 < chars.len() && chars[i + 1] == '\\' {
421                    // Escaped backslash: \\ → push both and continue
422                    // The next character (if it's a pipe) will be a real delimiter
423                    result.push('\\');
424                    result.push('\\');
425                    i += 2;
426                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
427                    // Escaped pipe: \| → mask the pipe
428                    result.push('\\');
429                    result.push('_'); // Mask the pipe
430                    i += 2;
431                } else {
432                    // Single backslash not followed by \ or | → just push it
433                    result.push(chars[i]);
434                    i += 1;
435                }
436            } else {
437                result.push(chars[i]);
438                i += 1;
439            }
440        }
441
442        result
443    }
444
445    /// Split a table row into individual cell contents with flavor-specific behavior.
446    ///
447    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
448    /// This is the foundation for both cell counting and cell content extraction.
449    ///
450    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
451    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
452    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
453        let trimmed = row.trim();
454
455        if !trimmed.contains('|') {
456            return Vec::new();
457        }
458
459        // First, mask escaped pipes (same for all flavors)
460        let masked = Self::mask_pipes_for_table_parsing(trimmed);
461
462        // For MkDocs flavor, also mask pipes inside inline code
463        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
464            Self::mask_pipes_in_inline_code(&masked)
465        } else {
466            masked
467        };
468
469        let has_leading = final_masked.starts_with('|');
470        let has_trailing = final_masked.ends_with('|');
471
472        let mut masked_content = final_masked.as_str();
473        let mut orig_content = trimmed;
474
475        if has_leading {
476            masked_content = &masked_content[1..];
477            orig_content = &orig_content[1..];
478        }
479
480        // Track whether we actually strip a trailing pipe
481        let stripped_trailing = has_trailing && !masked_content.is_empty();
482        if stripped_trailing {
483            masked_content = &masked_content[..masked_content.len() - 1];
484            orig_content = &orig_content[..orig_content.len() - 1];
485        }
486
487        // Handle edge cases for degenerate inputs
488        if masked_content.is_empty() {
489            if stripped_trailing {
490                // "||" case: two pipes with empty content between = one empty cell
491                return vec![String::new()];
492            } else {
493                // "|" case: single pipe, not a valid table row
494                return Vec::new();
495            }
496        }
497
498        let masked_parts: Vec<&str> = masked_content.split('|').collect();
499        let mut cells = Vec::new();
500        let mut pos = 0;
501
502        for masked_cell in masked_parts {
503            let cell_len = masked_cell.len();
504            let orig_cell = if pos + cell_len <= orig_content.len() {
505                &orig_content[pos..pos + cell_len]
506            } else {
507                masked_cell
508            };
509            cells.push(orig_cell.to_string());
510            pos += cell_len + 1; // +1 for the pipe delimiter
511        }
512
513        cells
514    }
515
516    /// Split a table row into individual cell contents using Standard/GFM behavior.
517    pub fn split_table_row(row: &str) -> Vec<String> {
518        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
519    }
520
521    /// Determine the pipe style of a table row
522    ///
523    /// Handles tables inside blockquotes by stripping the blockquote prefix
524    /// before analyzing the pipe style.
525    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
526        // Strip blockquote prefix if present before analyzing pipe style
527        let content = Self::strip_blockquote_prefix(line);
528        let trimmed = content.trim();
529        if !trimmed.contains('|') {
530            return None;
531        }
532
533        let has_leading = trimmed.starts_with('|');
534        let has_trailing = trimmed.ends_with('|');
535
536        match (has_leading, has_trailing) {
537            (true, true) => Some("leading_and_trailing"),
538            (true, false) => Some("leading_only"),
539            (false, true) => Some("trailing_only"),
540            (false, false) => Some("no_leading_or_trailing"),
541        }
542    }
543
544    /// Extract blockquote prefix from a line, returning (prefix, content).
545    ///
546    /// This is useful for stripping the prefix before processing, then restoring it after.
547    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
548    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
549        // Find where the actual content starts (after blockquote markers and spaces)
550        let bytes = line.as_bytes();
551        let mut pos = 0;
552
553        // Skip leading whitespace (indent before blockquote marker)
554        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
555            pos += 1;
556        }
557
558        // If no blockquote marker, return empty prefix
559        if pos >= bytes.len() || bytes[pos] != b'>' {
560            return ("", line);
561        }
562
563        // Skip all blockquote markers and spaces
564        while pos < bytes.len() {
565            if bytes[pos] == b'>' {
566                pos += 1;
567                // Skip optional space after >
568                if pos < bytes.len() && bytes[pos] == b' ' {
569                    pos += 1;
570                }
571            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
572                pos += 1;
573            } else {
574                break;
575            }
576        }
577
578        // Split at the position where content starts
579        (&line[..pos], &line[pos..])
580    }
581}
582
583#[cfg(test)]
584mod tests {
585    use super::*;
586    use crate::lint_context::LintContext;
587
588    #[test]
589    fn test_is_potential_table_row() {
590        // Basic valid table rows
591        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
592        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
593        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
594        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
595
596        // Multiple cells
597        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
598
599        // With whitespace
600        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
601        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
602
603        // Not table rows
604        assert!(!TableUtils::is_potential_table_row("- List item"));
605        assert!(!TableUtils::is_potential_table_row("* Another list"));
606        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
607        assert!(!TableUtils::is_potential_table_row("Regular text"));
608        assert!(!TableUtils::is_potential_table_row(""));
609        assert!(!TableUtils::is_potential_table_row("   "));
610
611        // Code blocks
612        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
613        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
614
615        // Single pipe not enough
616        assert!(!TableUtils::is_potential_table_row("Just one |"));
617        assert!(!TableUtils::is_potential_table_row("| Just one"));
618
619        // Very long cells are valid in tables (no length limit for cell content)
620        let long_cell = "a".repeat(150);
621        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
622
623        // Cells with newlines
624        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
625
626        // Empty cells (Issue #129)
627        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
628        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
629        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
630    }
631
632    #[test]
633    fn test_list_items_with_pipes_not_table_rows() {
634        // Ordered list items should NOT be detected as table rows
635        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
636        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
637        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
638        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
639        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
640
641        // Unordered list items with tabs
642        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
643        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
644        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
645
646        // Indented list items (the trim_start normalizes indentation)
647        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
648        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
649        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
650
651        // Task list items
652        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
653        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
654
655        // Multiple pipes in list items
656        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
657        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
658
659        // These SHOULD still be detected as potential table rows
660        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
661        assert!(TableUtils::is_potential_table_row("cell | cell"));
662        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
663    }
664
665    #[test]
666    fn test_is_delimiter_row() {
667        // Basic delimiter rows
668        assert!(TableUtils::is_delimiter_row("|---|---|"));
669        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
670        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
671        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
672
673        // With varying dash counts
674        assert!(TableUtils::is_delimiter_row("|-|--|"));
675        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
676
677        // With whitespace
678        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
679        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
680
681        // Multiple columns
682        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
683
684        // Without leading/trailing pipes
685        assert!(TableUtils::is_delimiter_row("--- | ---"));
686        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
687
688        // Not delimiter rows
689        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
690        assert!(!TableUtils::is_delimiter_row("Regular text"));
691        assert!(!TableUtils::is_delimiter_row(""));
692        assert!(!TableUtils::is_delimiter_row("|||"));
693        assert!(!TableUtils::is_delimiter_row("| | |"));
694
695        // Must have dashes
696        assert!(!TableUtils::is_delimiter_row("| : | : |"));
697        assert!(!TableUtils::is_delimiter_row("|    |    |"));
698
699        // Mixed content
700        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
701        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
702    }
703
704    #[test]
705    fn test_count_cells() {
706        // Basic counts
707        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
708        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
709        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
710        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
711
712        // Single cell
713        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
714        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
715
716        // Empty cells
717        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
718        assert_eq!(TableUtils::count_cells("| | | |"), 3);
719
720        // Many cells
721        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
722
723        // Edge cases
724        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
725        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
726
727        // No table
728        assert_eq!(TableUtils::count_cells("Regular text"), 0);
729        assert_eq!(TableUtils::count_cells(""), 0);
730        assert_eq!(TableUtils::count_cells("   "), 0);
731
732        // Whitespace handling
733        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
734        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
735    }
736
737    #[test]
738    fn test_count_cells_with_escaped_pipes() {
739        // In GFM tables, escape handling happens BEFORE cell splitting.
740        // Inline code does NOT protect pipes - they still act as cell delimiters.
741        // To include a literal pipe in a table cell, you MUST escape it with \|
742
743        // Basic table structure
744        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
745        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
746        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
747
748        // Escaped pipes: \| keeps the pipe as content
749        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
750        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
751
752        // Escaped pipes inside backticks (correct way to include | in code in tables)
753        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
754
755        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
756        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
757        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
758
759        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
760        // This matches GitHub's actual rendering where `a | b` splits into two cells
761        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
762        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
763        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
764
765        // The regex example from Issue #34 - pipes in regex patterns need escaping
766        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
767        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
768        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
769        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
770    }
771
772    #[test]
773    fn test_determine_pipe_style() {
774        // All pipe styles
775        assert_eq!(
776            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
777            Some("leading_and_trailing")
778        );
779        assert_eq!(
780            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
781            Some("leading_only")
782        );
783        assert_eq!(
784            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
785            Some("trailing_only")
786        );
787        assert_eq!(
788            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
789            Some("no_leading_or_trailing")
790        );
791
792        // With whitespace
793        assert_eq!(
794            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
795            Some("leading_and_trailing")
796        );
797        assert_eq!(
798            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
799            Some("leading_only")
800        );
801
802        // No pipes
803        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
804        assert_eq!(TableUtils::determine_pipe_style(""), None);
805        assert_eq!(TableUtils::determine_pipe_style("   "), None);
806
807        // Single pipe cases
808        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
809        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
810        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
811    }
812
813    #[test]
814    fn test_find_table_blocks_simple() {
815        let content = "| Header 1 | Header 2 |
816|-----------|-----------|
817| Cell 1    | Cell 2    |
818| Cell 3    | Cell 4    |";
819
820        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
821
822        let tables = TableUtils::find_table_blocks(content, &ctx);
823        assert_eq!(tables.len(), 1);
824
825        let table = &tables[0];
826        assert_eq!(table.start_line, 0);
827        assert_eq!(table.end_line, 3);
828        assert_eq!(table.header_line, 0);
829        assert_eq!(table.delimiter_line, 1);
830        assert_eq!(table.content_lines, vec![2, 3]);
831    }
832
833    #[test]
834    fn test_find_table_blocks_multiple() {
835        let content = "Some text
836
837| Table 1 | Col A |
838|----------|-------|
839| Data 1   | Val 1 |
840
841More text
842
843| Table 2 | Col 2 |
844|----------|-------|
845| Data 2   | Data  |";
846
847        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
848
849        let tables = TableUtils::find_table_blocks(content, &ctx);
850        assert_eq!(tables.len(), 2);
851
852        // First table
853        assert_eq!(tables[0].start_line, 2);
854        assert_eq!(tables[0].end_line, 4);
855        assert_eq!(tables[0].header_line, 2);
856        assert_eq!(tables[0].delimiter_line, 3);
857        assert_eq!(tables[0].content_lines, vec![4]);
858
859        // Second table
860        assert_eq!(tables[1].start_line, 8);
861        assert_eq!(tables[1].end_line, 10);
862        assert_eq!(tables[1].header_line, 8);
863        assert_eq!(tables[1].delimiter_line, 9);
864        assert_eq!(tables[1].content_lines, vec![10]);
865    }
866
867    #[test]
868    fn test_find_table_blocks_no_content_rows() {
869        let content = "| Header 1 | Header 2 |
870|-----------|-----------|
871
872Next paragraph";
873
874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
875
876        let tables = TableUtils::find_table_blocks(content, &ctx);
877        assert_eq!(tables.len(), 1);
878
879        let table = &tables[0];
880        assert_eq!(table.start_line, 0);
881        assert_eq!(table.end_line, 1); // Just header and delimiter
882        assert_eq!(table.content_lines.len(), 0);
883    }
884
885    #[test]
886    fn test_find_table_blocks_in_code_block() {
887        let content = "```
888| Not | A | Table |
889|-----|---|-------|
890| In  | Code | Block |
891```
892
893| Real | Table |
894|------|-------|
895| Data | Here  |";
896
897        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
898
899        let tables = TableUtils::find_table_blocks(content, &ctx);
900        assert_eq!(tables.len(), 1); // Only the table outside code block
901
902        let table = &tables[0];
903        assert_eq!(table.header_line, 6);
904        assert_eq!(table.delimiter_line, 7);
905    }
906
907    #[test]
908    fn test_find_table_blocks_no_tables() {
909        let content = "Just regular text
910No tables here
911- List item with | pipe
912* Another list item";
913
914        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
915
916        let tables = TableUtils::find_table_blocks(content, &ctx);
917        assert_eq!(tables.len(), 0);
918    }
919
920    #[test]
921    fn test_find_table_blocks_malformed() {
922        let content = "| Header without delimiter |
923| This looks like table |
924But no delimiter row
925
926| Proper | Table |
927|---------|-------|
928| Data    | Here  |";
929
930        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
931
932        let tables = TableUtils::find_table_blocks(content, &ctx);
933        assert_eq!(tables.len(), 1); // Only the proper table
934        assert_eq!(tables[0].header_line, 4);
935    }
936
937    #[test]
938    fn test_edge_cases() {
939        // Test empty content
940        assert!(!TableUtils::is_potential_table_row(""));
941        assert!(!TableUtils::is_delimiter_row(""));
942        assert_eq!(TableUtils::count_cells(""), 0);
943        assert_eq!(TableUtils::determine_pipe_style(""), None);
944
945        // Test whitespace only
946        assert!(!TableUtils::is_potential_table_row("   "));
947        assert!(!TableUtils::is_delimiter_row("   "));
948        assert_eq!(TableUtils::count_cells("   "), 0);
949        assert_eq!(TableUtils::determine_pipe_style("   "), None);
950
951        // Test single character
952        assert!(!TableUtils::is_potential_table_row("|"));
953        assert!(!TableUtils::is_delimiter_row("|"));
954        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
955
956        // Test very long lines are valid table rows (no length limit)
957        // Test both single-column and multi-column long lines
958        let long_single = format!("| {} |", "a".repeat(200));
959        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
960
961        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
962        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
963
964        // Test unicode
965        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
966        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
967        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
968    }
969
970    #[test]
971    fn test_table_block_struct() {
972        let block = TableBlock {
973            start_line: 0,
974            end_line: 5,
975            header_line: 0,
976            delimiter_line: 1,
977            content_lines: vec![2, 3, 4, 5],
978        };
979
980        // Test Debug trait
981        let debug_str = format!("{block:?}");
982        assert!(debug_str.contains("TableBlock"));
983        assert!(debug_str.contains("start_line: 0"));
984
985        // Test Clone trait
986        let cloned = block.clone();
987        assert_eq!(cloned.start_line, block.start_line);
988        assert_eq!(cloned.end_line, block.end_line);
989        assert_eq!(cloned.header_line, block.header_line);
990        assert_eq!(cloned.delimiter_line, block.delimiter_line);
991        assert_eq!(cloned.content_lines, block.content_lines);
992    }
993
994    #[test]
995    fn test_split_table_row() {
996        // Basic split
997        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
998        assert_eq!(cells.len(), 3);
999        assert_eq!(cells[0].trim(), "Cell 1");
1000        assert_eq!(cells[1].trim(), "Cell 2");
1001        assert_eq!(cells[2].trim(), "Cell 3");
1002
1003        // Without trailing pipe
1004        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1005        assert_eq!(cells.len(), 2);
1006
1007        // Empty cells
1008        let cells = TableUtils::split_table_row("| | | |");
1009        assert_eq!(cells.len(), 3);
1010
1011        // Single cell
1012        let cells = TableUtils::split_table_row("| Cell |");
1013        assert_eq!(cells.len(), 1);
1014        assert_eq!(cells[0].trim(), "Cell");
1015
1016        // No pipes
1017        let cells = TableUtils::split_table_row("No pipes here");
1018        assert_eq!(cells.len(), 0);
1019    }
1020
1021    #[test]
1022    fn test_split_table_row_with_escaped_pipes() {
1023        // Escaped pipes should be preserved in cell content
1024        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1025        assert_eq!(cells.len(), 2);
1026        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1027
1028        // Double backslash + pipe is NOT escaped
1029        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1030        assert_eq!(cells.len(), 3);
1031    }
1032
1033    #[test]
1034    fn test_split_table_row_with_flavor_mkdocs() {
1035        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1036        let cells =
1037            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1038        assert_eq!(cells.len(), 2);
1039        assert!(
1040            cells[1].contains("`x | y`"),
1041            "Inline code with pipe should be single cell in MkDocs flavor"
1042        );
1043
1044        // Multiple pipes in inline code
1045        let cells =
1046            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1047        assert_eq!(cells.len(), 2);
1048        assert!(cells[1].contains("`a | b | c`"));
1049    }
1050
1051    #[test]
1052    fn test_split_table_row_with_flavor_standard() {
1053        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
1054        let cells =
1055            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1056        // In GFM, `x | y` splits into separate cells
1057        assert_eq!(cells.len(), 3);
1058    }
1059
1060    // === extract_blockquote_prefix tests ===
1061
1062    #[test]
1063    fn test_extract_blockquote_prefix_no_blockquote() {
1064        // Regular table row without blockquote
1065        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1066        assert_eq!(prefix, "");
1067        assert_eq!(content, "| H1 | H2 |");
1068    }
1069
1070    #[test]
1071    fn test_extract_blockquote_prefix_single_level() {
1072        // Single blockquote level
1073        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1074        assert_eq!(prefix, "> ");
1075        assert_eq!(content, "| H1 | H2 |");
1076    }
1077
1078    #[test]
1079    fn test_extract_blockquote_prefix_double_level() {
1080        // Double blockquote level
1081        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1082        assert_eq!(prefix, ">> ");
1083        assert_eq!(content, "| H1 | H2 |");
1084    }
1085
1086    #[test]
1087    fn test_extract_blockquote_prefix_triple_level() {
1088        // Triple blockquote level
1089        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1090        assert_eq!(prefix, ">>> ");
1091        assert_eq!(content, "| H1 | H2 |");
1092    }
1093
1094    #[test]
1095    fn test_extract_blockquote_prefix_with_spaces() {
1096        // Blockquote with spaces between markers
1097        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1098        assert_eq!(prefix, "> > ");
1099        assert_eq!(content, "| H1 | H2 |");
1100    }
1101
1102    #[test]
1103    fn test_extract_blockquote_prefix_indented() {
1104        // Indented blockquote
1105        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1106        assert_eq!(prefix, "  > ");
1107        assert_eq!(content, "| H1 | H2 |");
1108    }
1109
1110    #[test]
1111    fn test_extract_blockquote_prefix_no_space_after() {
1112        // Blockquote without space after marker
1113        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1114        assert_eq!(prefix, ">");
1115        assert_eq!(content, "| H1 | H2 |");
1116    }
1117
1118    #[test]
1119    fn test_determine_pipe_style_in_blockquote() {
1120        // determine_pipe_style should handle blockquotes correctly
1121        assert_eq!(
1122            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1123            Some("leading_and_trailing")
1124        );
1125        assert_eq!(
1126            TableUtils::determine_pipe_style("> H1 | H2"),
1127            Some("no_leading_or_trailing")
1128        );
1129        assert_eq!(
1130            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1131            Some("leading_and_trailing")
1132        );
1133        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1134    }
1135}