rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Returns true if the line has at least one unescaped pipe separator outside inline code spans.
33    ///
34    /// This helps distinguish actual table separators from command/prose examples like
35    /// `` `echo a | sed 's/a/b/'` `` where the pipe is fully inside inline code.
36    fn has_unescaped_pipe_outside_inline_code(text: &str) -> bool {
37        let chars: Vec<char> = text.chars().collect();
38        let mut i = 0;
39        let mut in_code = false;
40        let mut code_delim_len = 0usize;
41
42        while i < chars.len() {
43            let ch = chars[i];
44
45            if ch == '\\' {
46                // Skip escaped character.
47                i += if i + 1 < chars.len() { 2 } else { 1 };
48                continue;
49            }
50
51            if ch == '`' {
52                let mut run = 1usize;
53                while i + run < chars.len() && chars[i + run] == '`' {
54                    run += 1;
55                }
56
57                if in_code {
58                    if run == code_delim_len {
59                        in_code = false;
60                        code_delim_len = 0;
61                    }
62                } else {
63                    in_code = true;
64                    code_delim_len = run;
65                }
66
67                i += run;
68                continue;
69            }
70
71            if ch == '|' && !in_code {
72                return true;
73            }
74
75            i += 1;
76        }
77
78        false
79    }
80
81    /// Check if a line looks like a potential table row
82    pub fn is_potential_table_row(line: &str) -> bool {
83        let trimmed = line.trim();
84        if trimmed.is_empty() || !trimmed.contains('|') {
85            return false;
86        }
87
88        // Skip lines that are clearly not table rows
89        // Unordered list items with space or tab after marker
90        if trimmed.starts_with("- ")
91            || trimmed.starts_with("* ")
92            || trimmed.starts_with("+ ")
93            || trimmed.starts_with("-\t")
94            || trimmed.starts_with("*\t")
95            || trimmed.starts_with("+\t")
96        {
97            return false;
98        }
99
100        // Skip ordered list items: digits followed by . or ) then space/tab
101        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
102            && first_non_digit > 0
103        {
104            let after_digits = &trimmed[first_non_digit..];
105            if after_digits.starts_with(". ")
106                || after_digits.starts_with(".\t")
107                || after_digits.starts_with(") ")
108                || after_digits.starts_with(")\t")
109            {
110                return false;
111            }
112        }
113
114        // Skip ATX headings (# through ######)
115        if trimmed.starts_with('#') {
116            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
117            if hash_count <= 6 {
118                let after_hashes = &trimmed[hash_count..];
119                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
120                    return false;
121                }
122            }
123        }
124
125        // For rows without explicit outer pipes, require a real separator outside
126        // inline code spans to avoid prose/command false positives.
127        let has_outer_pipes = trimmed.starts_with('|') && trimmed.ends_with('|');
128        if !has_outer_pipes && !Self::has_unescaped_pipe_outside_inline_code(trimmed) {
129            return false;
130        }
131
132        // Must have at least 2 parts when split by |
133        let parts: Vec<&str> = trimmed.split('|').collect();
134        if parts.len() < 2 {
135            return false;
136        }
137
138        // Check if it looks like a table row by having reasonable content between pipes
139        let mut valid_parts = 0;
140        let mut total_non_empty_parts = 0;
141
142        for part in &parts {
143            let part_trimmed = part.trim();
144            // Skip empty parts (from leading/trailing pipes)
145            if part_trimmed.is_empty() {
146                continue;
147            }
148            total_non_empty_parts += 1;
149
150            // Count parts that look like table cells (reasonable content, no newlines)
151            if !part_trimmed.contains('\n') {
152                valid_parts += 1;
153            }
154        }
155
156        // Check if all non-empty parts are valid (no newlines)
157        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
158            // Some cells contain newlines, not a valid table row
159            return false;
160        }
161
162        // GFM allows tables with all empty cells (e.g., |||)
163        // These are valid if they have proper table formatting (leading and trailing pipes)
164        if total_non_empty_parts == 0 {
165            // Empty cells are only valid with proper pipe formatting
166            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
167        }
168
169        // GFM allows single-column tables, so >= 1 valid part is enough
170        // when the line has proper table formatting (pipes)
171        if trimmed.starts_with('|') && trimmed.ends_with('|') {
172            // Properly formatted table row with pipes on both ends
173            valid_parts >= 1
174        } else {
175            // For rows without proper pipe formatting, require at least 2 cells
176            valid_parts >= 2
177        }
178    }
179
180    /// Check if a line is a table delimiter row (e.g., |---|---|)
181    pub fn is_delimiter_row(line: &str) -> bool {
182        let trimmed = line.trim();
183        if !trimmed.contains('|') || !trimmed.contains('-') {
184            return false;
185        }
186
187        // Split by pipes and check each part
188        let parts: Vec<&str> = trimmed.split('|').collect();
189        let mut valid_delimiter_parts = 0;
190        let mut total_non_empty_parts = 0;
191
192        for part in &parts {
193            let part_trimmed = part.trim();
194            if part_trimmed.is_empty() {
195                continue; // Skip empty parts from leading/trailing pipes
196            }
197
198            total_non_empty_parts += 1;
199
200            // Check if this part looks like a delimiter (contains dashes and optionally colons)
201            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
202                valid_delimiter_parts += 1;
203            }
204        }
205
206        // All non-empty parts must be valid delimiters, and there must be at least one
207        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
208    }
209
210    /// Strip blockquote prefix from a line, returning the content without the prefix
211    fn strip_blockquote_prefix(line: &str) -> &str {
212        let trimmed = line.trim_start();
213        if trimmed.starts_with('>') {
214            // Strip all blockquote markers and following space
215            let mut rest = trimmed;
216            while rest.starts_with('>') {
217                rest = rest.strip_prefix('>').unwrap_or(rest);
218                rest = rest.trim_start_matches(' ');
219            }
220            rest
221        } else {
222            line
223        }
224    }
225
226    /// Find all table blocks in the content with optimized detection
227    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
228    pub fn find_table_blocks_with_code_info(
229        content: &str,
230        code_blocks: &[(usize, usize)],
231        code_spans: &[crate::lint_context::CodeSpan],
232        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
233    ) -> Vec<TableBlock> {
234        let lines: Vec<&str> = content.lines().collect();
235        let mut tables = Vec::new();
236        let mut i = 0;
237
238        // Pre-compute line positions for efficient code block checking
239        let mut line_positions = Vec::with_capacity(lines.len());
240        let mut pos = 0;
241        for line in &lines {
242            line_positions.push(pos);
243            pos += line.len() + 1; // +1 for newline
244        }
245
246        // Stack of active list content indents for continuation table tracking.
247        // Supports nested lists: when a child list is seen, we push; when we
248        // dedent past a level, we pop back to the enclosing list.
249        let mut list_indent_stack: Vec<usize> = Vec::new();
250
251        while i < lines.len() {
252            // Skip lines in code blocks, code spans, or HTML comments
253            let line_start = line_positions[i];
254            let in_code =
255                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
256                    || code_spans
257                        .iter()
258                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
259            let in_html_comment = html_comment_ranges
260                .iter()
261                .any(|range| line_start >= range.start && line_start < range.end);
262
263            if in_code || in_html_comment {
264                i += 1;
265                continue;
266            }
267
268            // Strip blockquote prefix for table detection
269            let line_content = Self::strip_blockquote_prefix(lines[i]);
270
271            // Update active list tracking
272            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
273            if !list_prefix.is_empty() {
274                // Line has a list marker. Pop any deeper/equal levels, then push this one.
275                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
276                    list_indent_stack.pop();
277                }
278                list_indent_stack.push(content_indent);
279            } else if !line_content.trim().is_empty() {
280                // Non-blank line without a marker: pop any levels we've dedented past
281                let leading = line_content.len() - line_content.trim_start().len();
282                while list_indent_stack.last().is_some_and(|&top| leading < top) {
283                    list_indent_stack.pop();
284                }
285            }
286            // Blank lines keep the stack unchanged (blank lines don't end list items)
287
288            // Check if this is a list item that contains a table row on the same line,
289            // or a continuation table indented under an active list item
290            let (is_same_line_list_table, effective_content) =
291                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
292                    (true, list_content)
293                } else {
294                    (false, line_content)
295                };
296
297            // Detect continuation list tables: no marker on this line, but indented
298            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
299            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
300                let leading = line_content.len() - line_content.trim_start().len();
301                // Find the deepest list level this line is indented under
302                list_indent_stack
303                    .iter()
304                    .rev()
305                    .find(|&&indent| leading >= indent)
306                    .copied()
307            } else {
308                None
309            };
310
311            let is_continuation_list_table = continuation_indent.is_some()
312                && {
313                    let indent = continuation_indent.unwrap();
314                    let leading = line_content.len() - line_content.trim_start().len();
315                    // Per CommonMark, 4+ spaces beyond content indent is a code block
316                    leading < indent + 4
317                }
318                && Self::is_potential_table_row(effective_content);
319
320            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
321
322            // For continuation list tables, use the matched list indent
323            let effective_content_indent = if is_same_line_list_table {
324                content_indent
325            } else if is_continuation_list_table {
326                continuation_indent.unwrap()
327            } else {
328                0
329            };
330
331            // Look for potential table start
332            if is_any_list_table || Self::is_potential_table_row(effective_content) {
333                // For list tables (same-line or continuation), check indented continuation lines
334                // For regular tables, check the next line directly
335                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
336                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
337                    if is_any_list_table {
338                        // Verify the delimiter line has proper indentation
339                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
340                        if leading_spaces >= effective_content_indent {
341                            // Has proper indentation, strip it and check as delimiter
342                            (
343                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
344                                true,
345                            )
346                        } else {
347                            // Not enough indentation - not a list table
348                            (next_raw, false)
349                        }
350                    } else {
351                        (next_raw, true)
352                    }
353                } else {
354                    ("", true)
355                };
356
357                // For list tables, only accept if delimiter has valid indentation
358                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
359
360                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
361                    // Found a table! Find its end
362                    let table_start = i;
363                    let header_line = i;
364                    let delimiter_line = i + 1;
365                    let mut table_end = i + 1; // Include the delimiter row
366                    let mut content_lines = Vec::new();
367
368                    // Continue while we have table rows
369                    let mut j = i + 2;
370                    while j < lines.len() {
371                        let line = lines[j];
372                        // Strip blockquote prefix for checking
373                        let raw_content = Self::strip_blockquote_prefix(line);
374
375                        // For list tables, strip expected indentation
376                        let line_content = if effective_is_list_table {
377                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
378                        } else {
379                            raw_content
380                        };
381
382                        if line_content.trim().is_empty() {
383                            // Empty line ends the table
384                            break;
385                        }
386
387                        // For list tables, the continuation line must have proper indentation
388                        if effective_is_list_table {
389                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
390                            if leading_spaces < effective_content_indent {
391                                // Not enough indentation - end of table
392                                break;
393                            }
394                        }
395
396                        if Self::is_potential_table_row(line_content) {
397                            content_lines.push(j);
398                            table_end = j;
399                            j += 1;
400                        } else {
401                            // Non-table line ends the table
402                            break;
403                        }
404                    }
405
406                    let list_context = if effective_is_list_table {
407                        if is_same_line_list_table {
408                            // Same-line: prefix is the actual list marker (e.g., "- ")
409                            Some(ListTableContext {
410                                list_prefix: list_prefix.to_string(),
411                                content_indent: effective_content_indent,
412                            })
413                        } else {
414                            // Continuation: prefix is the indentation spaces
415                            Some(ListTableContext {
416                                list_prefix: " ".repeat(effective_content_indent),
417                                content_indent: effective_content_indent,
418                            })
419                        }
420                    } else {
421                        None
422                    };
423
424                    tables.push(TableBlock {
425                        start_line: table_start,
426                        end_line: table_end,
427                        header_line,
428                        delimiter_line,
429                        content_lines,
430                        list_context,
431                    });
432                    i = table_end + 1;
433                } else {
434                    i += 1;
435                }
436            } else {
437                i += 1;
438            }
439        }
440
441        tables
442    }
443
444    /// Strip list continuation indentation from a line.
445    /// For lines that are continuations of a list item's content, strip the expected indent.
446    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
447        let bytes = line.as_bytes();
448        let mut spaces = 0;
449
450        for &b in bytes {
451            if b == b' ' {
452                spaces += 1;
453            } else if b == b'\t' {
454                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
455                spaces = (spaces / 4 + 1) * 4;
456            } else {
457                break;
458            }
459
460            if spaces >= expected_indent {
461                break;
462            }
463        }
464
465        // Strip at most expected_indent characters
466        let strip_count = spaces.min(expected_indent).min(line.len());
467        // Count actual bytes to strip (handling tabs)
468        let mut byte_count = 0;
469        let mut counted_spaces = 0;
470        for &b in bytes {
471            if counted_spaces >= strip_count {
472                break;
473            }
474            if b == b' ' {
475                counted_spaces += 1;
476                byte_count += 1;
477            } else if b == b'\t' {
478                counted_spaces = (counted_spaces / 4 + 1) * 4;
479                byte_count += 1;
480            } else {
481                break;
482            }
483        }
484
485        &line[byte_count..]
486    }
487
488    /// Find all table blocks in the content with optimized detection
489    /// This is a backward-compatible wrapper that accepts LintContext
490    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
491        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
492    }
493
494    /// Count the number of cells in a table row
495    pub fn count_cells(row: &str) -> usize {
496        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
497    }
498
499    /// Count the number of cells in a table row with flavor-specific behavior
500    ///
501    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
502    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
503    ///
504    /// This function strips blockquote prefixes before counting cells, so it works
505    /// correctly for tables inside blockquotes.
506    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
507        // Strip blockquote prefix if present before counting cells
508        let (_, content) = Self::extract_blockquote_prefix(row);
509        Self::split_table_row_with_flavor(content, flavor).len()
510    }
511
512    /// Mask pipes inside inline code blocks with a placeholder character
513    pub fn mask_pipes_in_inline_code(text: &str) -> String {
514        let mut result = String::new();
515        let chars: Vec<char> = text.chars().collect();
516        let mut i = 0;
517
518        while i < chars.len() {
519            if chars[i] == '`' {
520                // Count consecutive backticks at start
521                let start = i;
522                let mut backtick_count = 0;
523                while i < chars.len() && chars[i] == '`' {
524                    backtick_count += 1;
525                    i += 1;
526                }
527
528                // Look for matching closing backticks
529                let mut found_closing = false;
530                let mut j = i;
531
532                while j < chars.len() {
533                    if chars[j] == '`' {
534                        // Count potential closing backticks
535                        let close_start = j;
536                        let mut close_count = 0;
537                        while j < chars.len() && chars[j] == '`' {
538                            close_count += 1;
539                            j += 1;
540                        }
541
542                        if close_count == backtick_count {
543                            // Found matching closing backticks
544                            found_closing = true;
545
546                            // Valid inline code - add with pipes masked
547                            result.extend(chars[start..i].iter());
548
549                            for &ch in chars.iter().take(close_start).skip(i) {
550                                if ch == '|' {
551                                    result.push('_'); // Mask pipe with underscore
552                                } else {
553                                    result.push(ch);
554                                }
555                            }
556
557                            result.extend(chars[close_start..j].iter());
558                            i = j;
559                            break;
560                        }
561                        // If not matching, continue searching (j is already past these backticks)
562                    } else {
563                        j += 1;
564                    }
565                }
566
567                if !found_closing {
568                    // No matching closing found, treat as regular text
569                    result.extend(chars[start..i].iter());
570                }
571            } else {
572                result.push(chars[i]);
573                i += 1;
574            }
575        }
576
577        result
578    }
579
580    /// Escape pipes inside inline code blocks with backslash.
581    /// Converts `|` to `\|` inside backtick spans.
582    /// Used by auto-fix to preserve content while making tables valid.
583    pub fn escape_pipes_in_inline_code(text: &str) -> String {
584        let mut result = String::new();
585        let chars: Vec<char> = text.chars().collect();
586        let mut i = 0;
587
588        while i < chars.len() {
589            if chars[i] == '`' {
590                let start = i;
591                let mut backtick_count = 0;
592                while i < chars.len() && chars[i] == '`' {
593                    backtick_count += 1;
594                    i += 1;
595                }
596
597                let mut found_closing = false;
598                let mut j = i;
599
600                while j < chars.len() {
601                    if chars[j] == '`' {
602                        let close_start = j;
603                        let mut close_count = 0;
604                        while j < chars.len() && chars[j] == '`' {
605                            close_count += 1;
606                            j += 1;
607                        }
608
609                        if close_count == backtick_count {
610                            found_closing = true;
611                            result.extend(chars[start..i].iter());
612
613                            for &ch in chars.iter().take(close_start).skip(i) {
614                                if ch == '|' {
615                                    result.push('\\');
616                                    result.push('|');
617                                } else {
618                                    result.push(ch);
619                                }
620                            }
621
622                            result.extend(chars[close_start..j].iter());
623                            i = j;
624                            break;
625                        }
626                    } else {
627                        j += 1;
628                    }
629                }
630
631                if !found_closing {
632                    result.extend(chars[start..i].iter());
633                }
634            } else {
635                result.push(chars[i]);
636                i += 1;
637            }
638        }
639
640        result
641    }
642
643    /// Mask escaped pipes for accurate table cell parsing
644    ///
645    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
646    /// - `\|` → escaped pipe → masked (stays as cell content)
647    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
648    ///
649    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
650    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
651    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
652    ///
653    /// To include a literal pipe in a table cell (even in code), you must escape it:
654    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
655    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
656        let mut result = String::new();
657        let chars: Vec<char> = text.chars().collect();
658        let mut i = 0;
659
660        while i < chars.len() {
661            if chars[i] == '\\' {
662                if i + 1 < chars.len() && chars[i + 1] == '\\' {
663                    // Escaped backslash: \\ → push both and continue
664                    // The next character (if it's a pipe) will be a real delimiter
665                    result.push('\\');
666                    result.push('\\');
667                    i += 2;
668                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
669                    // Escaped pipe: \| → mask the pipe
670                    result.push('\\');
671                    result.push('_'); // Mask the pipe
672                    i += 2;
673                } else {
674                    // Single backslash not followed by \ or | → just push it
675                    result.push(chars[i]);
676                    i += 1;
677                }
678            } else {
679                result.push(chars[i]);
680                i += 1;
681            }
682        }
683
684        result
685    }
686
687    /// Split a table row into individual cell contents with flavor-specific behavior.
688    ///
689    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
690    /// This is the foundation for both cell counting and cell content extraction.
691    ///
692    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
693    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
694    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
695        let trimmed = row.trim();
696
697        if !trimmed.contains('|') {
698            return Vec::new();
699        }
700
701        // First, mask escaped pipes (same for all flavors)
702        let masked = Self::mask_pipes_for_table_parsing(trimmed);
703
704        // For MkDocs flavor, also mask pipes inside inline code
705        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
706            Self::mask_pipes_in_inline_code(&masked)
707        } else {
708            masked
709        };
710
711        let has_leading = final_masked.starts_with('|');
712        let has_trailing = final_masked.ends_with('|');
713
714        let mut masked_content = final_masked.as_str();
715        let mut orig_content = trimmed;
716
717        if has_leading {
718            masked_content = &masked_content[1..];
719            orig_content = &orig_content[1..];
720        }
721
722        // Track whether we actually strip a trailing pipe
723        let stripped_trailing = has_trailing && !masked_content.is_empty();
724        if stripped_trailing {
725            masked_content = &masked_content[..masked_content.len() - 1];
726            orig_content = &orig_content[..orig_content.len() - 1];
727        }
728
729        // Handle edge cases for degenerate inputs
730        if masked_content.is_empty() {
731            if stripped_trailing {
732                // "||" case: two pipes with empty content between = one empty cell
733                return vec![String::new()];
734            } else {
735                // "|" case: single pipe, not a valid table row
736                return Vec::new();
737            }
738        }
739
740        let masked_parts: Vec<&str> = masked_content.split('|').collect();
741        let mut cells = Vec::new();
742        let mut pos = 0;
743
744        for masked_cell in masked_parts {
745            let cell_len = masked_cell.len();
746            let orig_cell = if pos + cell_len <= orig_content.len() {
747                &orig_content[pos..pos + cell_len]
748            } else {
749                masked_cell
750            };
751            cells.push(orig_cell.to_string());
752            pos += cell_len + 1; // +1 for the pipe delimiter
753        }
754
755        cells
756    }
757
758    /// Split a table row into individual cell contents using Standard/GFM behavior.
759    pub fn split_table_row(row: &str) -> Vec<String> {
760        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
761    }
762
763    /// Determine the pipe style of a table row
764    ///
765    /// Handles tables inside blockquotes by stripping the blockquote prefix
766    /// before analyzing the pipe style.
767    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
768        // Strip blockquote prefix if present before analyzing pipe style
769        let content = Self::strip_blockquote_prefix(line);
770        let trimmed = content.trim();
771        if !trimmed.contains('|') {
772            return None;
773        }
774
775        let has_leading = trimmed.starts_with('|');
776        let has_trailing = trimmed.ends_with('|');
777
778        match (has_leading, has_trailing) {
779            (true, true) => Some("leading_and_trailing"),
780            (true, false) => Some("leading_only"),
781            (false, true) => Some("trailing_only"),
782            (false, false) => Some("no_leading_or_trailing"),
783        }
784    }
785
786    /// Extract blockquote prefix from a line, returning (prefix, content).
787    ///
788    /// This is useful for stripping the prefix before processing, then restoring it after.
789    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
790    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
791        // Find where the actual content starts (after blockquote markers and spaces)
792        let bytes = line.as_bytes();
793        let mut pos = 0;
794
795        // Skip leading whitespace (indent before blockquote marker)
796        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
797            pos += 1;
798        }
799
800        // If no blockquote marker, return empty prefix
801        if pos >= bytes.len() || bytes[pos] != b'>' {
802            return ("", line);
803        }
804
805        // Skip all blockquote markers and spaces
806        while pos < bytes.len() {
807            if bytes[pos] == b'>' {
808                pos += 1;
809                // Skip optional space after >
810                if pos < bytes.len() && bytes[pos] == b' ' {
811                    pos += 1;
812                }
813            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
814                pos += 1;
815            } else {
816                break;
817            }
818        }
819
820        // Split at the position where content starts
821        (&line[..pos], &line[pos..])
822    }
823
824    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
825    ///
826    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
827    /// Returns:
828    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
829    /// - content: The content after the list marker
830    /// - content_indent: The number of spaces needed for continuation lines to align with content
831    ///
832    /// For example:
833    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
834    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
835    /// - `"  - table"` returns `("  - ", "table", 4)`
836    ///
837    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
838    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
839        let bytes = line.as_bytes();
840
841        // Skip leading whitespace
842        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
843        let mut pos = leading_spaces;
844
845        if pos >= bytes.len() {
846            return ("", line, 0);
847        }
848
849        // Check for unordered list marker: -, *, +
850        if matches!(bytes[pos], b'-' | b'*' | b'+') {
851            pos += 1;
852
853            // Must be followed by space or tab (or end of line for marker-only lines)
854            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
855                // Skip the space after marker if present
856                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
857                    pos += 1;
858                }
859                let content_indent = pos;
860                return (&line[..pos], &line[pos..], content_indent);
861            }
862            // Not a list marker (e.g., "-word" or "--")
863            return ("", line, 0);
864        }
865
866        // Check for ordered list marker: digits followed by . or ) then space
867        if bytes[pos].is_ascii_digit() {
868            let digit_start = pos;
869            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
870                pos += 1;
871            }
872
873            // Must have at least one digit
874            if pos > digit_start && pos < bytes.len() {
875                // Check for . or ) followed by space/tab
876                if bytes[pos] == b'.' || bytes[pos] == b')' {
877                    pos += 1;
878                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
879                        // Skip the space after marker if present
880                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
881                            pos += 1;
882                        }
883                        let content_indent = pos;
884                        return (&line[..pos], &line[pos..], content_indent);
885                    }
886                }
887            }
888        }
889
890        ("", line, 0)
891    }
892
893    /// Extract the table row content from a line, stripping any list/blockquote prefix.
894    ///
895    /// This is useful for processing table rows that may be inside list items or blockquotes.
896    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
897    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
898        // First strip blockquote prefix
899        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
900
901        // Then handle list prefix if present
902        if let Some(ref list_ctx) = table_block.list_context {
903            if line_index == 0 {
904                // Header line: strip list prefix (handles both markers and indentation)
905                after_blockquote
906                    .strip_prefix(&list_ctx.list_prefix)
907                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
908            } else {
909                // Continuation lines: strip indentation
910                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
911            }
912        } else {
913            after_blockquote
914        }
915    }
916
917    /// Check if the content after a list marker looks like a table row.
918    /// This is used to detect tables that start on the same line as a list marker.
919    pub fn is_list_item_with_table_row(line: &str) -> bool {
920        let (prefix, content, _) = Self::extract_list_prefix(line);
921        if prefix.is_empty() {
922            return false;
923        }
924
925        // Check if the content after the list marker is a table row
926        // It must start with | (proper table format within a list)
927        let trimmed = content.trim();
928        if !trimmed.starts_with('|') {
929            return false;
930        }
931
932        // Use our table row detection on the content
933        Self::is_potential_table_row_content(content)
934    }
935
936    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
937    fn is_potential_table_row_content(content: &str) -> bool {
938        Self::is_potential_table_row(content)
939    }
940}
941
942#[cfg(test)]
943mod tests {
944    use super::*;
945    use crate::lint_context::LintContext;
946
947    #[test]
948    fn test_is_potential_table_row() {
949        // Basic valid table rows
950        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
951        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
952        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
953        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
954
955        // Multiple cells
956        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
957
958        // With whitespace
959        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
960        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
961
962        // Not table rows
963        assert!(!TableUtils::is_potential_table_row("- List item"));
964        assert!(!TableUtils::is_potential_table_row("* Another list"));
965        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
966        assert!(!TableUtils::is_potential_table_row("Regular text"));
967        assert!(!TableUtils::is_potential_table_row(""));
968        assert!(!TableUtils::is_potential_table_row("   "));
969
970        // Code blocks
971        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
972        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
973        assert!(!TableUtils::is_potential_table_row("Use ``a|b`` in prose"));
974        assert!(TableUtils::is_potential_table_row("| `fenced` | Uses ``` and ~~~ |"));
975        assert!(TableUtils::is_potential_table_row("`!foo && bar` | `(!foo) && bar`"));
976        assert!(!TableUtils::is_potential_table_row("`echo a | sed 's/a/b/'`"));
977
978        // Single pipe not enough
979        assert!(!TableUtils::is_potential_table_row("Just one |"));
980        assert!(!TableUtils::is_potential_table_row("| Just one"));
981
982        // Very long cells are valid in tables (no length limit for cell content)
983        let long_cell = "a".repeat(150);
984        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
985
986        // Cells with newlines
987        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
988
989        // Empty cells (Issue #129)
990        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
991        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
992        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
993    }
994
995    #[test]
996    fn test_list_items_with_pipes_not_table_rows() {
997        // Ordered list items should NOT be detected as table rows
998        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
999        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
1000        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
1001        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
1002        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
1003
1004        // Unordered list items with tabs
1005        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
1006        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
1007        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
1008
1009        // Indented list items (the trim_start normalizes indentation)
1010        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
1011        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
1012        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
1013
1014        // Task list items
1015        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
1016        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
1017
1018        // Multiple pipes in list items
1019        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
1020        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
1021
1022        // These SHOULD still be detected as potential table rows
1023        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
1024        assert!(TableUtils::is_potential_table_row("cell | cell"));
1025        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
1026    }
1027
1028    #[test]
1029    fn test_atx_headings_with_pipes_not_table_rows() {
1030        // All 6 ATX heading levels with pipes
1031        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
1032        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
1033        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
1034        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
1035        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
1036        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
1037
1038        // Multiple pipes in headings
1039        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1040        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1041
1042        // Headings with tab after hashes
1043        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1044        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1045
1046        // Heading with only hashes and pipe (empty heading text)
1047        assert!(!TableUtils::is_potential_table_row("# |"));
1048        assert!(!TableUtils::is_potential_table_row("## |"));
1049
1050        // Indented headings (spaces before #)
1051        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1052        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1053
1054        // Unicode content in headings (the original proptest failure case)
1055        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1056
1057        // 7+ hashes are NOT headings — should follow normal table detection
1058        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1059        // but with a space it still has 7+ hashes so not a heading
1060        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1061
1062        // Hash without space is NOT a heading, so pipe detection applies
1063        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1064
1065        // These SHOULD still be detected as potential table rows
1066        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1067        assert!(TableUtils::is_potential_table_row("text | #tag"));
1068    }
1069
1070    #[test]
1071    fn test_is_delimiter_row() {
1072        // Basic delimiter rows
1073        assert!(TableUtils::is_delimiter_row("|---|---|"));
1074        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1075        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1076        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1077
1078        // With varying dash counts
1079        assert!(TableUtils::is_delimiter_row("|-|--|"));
1080        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1081
1082        // With whitespace
1083        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1084        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1085
1086        // Multiple columns
1087        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1088
1089        // Without leading/trailing pipes
1090        assert!(TableUtils::is_delimiter_row("--- | ---"));
1091        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1092
1093        // Not delimiter rows
1094        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1095        assert!(!TableUtils::is_delimiter_row("Regular text"));
1096        assert!(!TableUtils::is_delimiter_row(""));
1097        assert!(!TableUtils::is_delimiter_row("|||"));
1098        assert!(!TableUtils::is_delimiter_row("| | |"));
1099
1100        // Must have dashes
1101        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1102        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1103
1104        // Mixed content
1105        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1106        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1107    }
1108
1109    #[test]
1110    fn test_count_cells() {
1111        // Basic counts
1112        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1113        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1114        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1115        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1116
1117        // Single cell
1118        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1119        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1120
1121        // Empty cells
1122        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1123        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1124
1125        // Many cells
1126        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1127
1128        // Edge cases
1129        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1130        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1131
1132        // No table
1133        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1134        assert_eq!(TableUtils::count_cells(""), 0);
1135        assert_eq!(TableUtils::count_cells("   "), 0);
1136
1137        // Whitespace handling
1138        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1139        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1140    }
1141
1142    #[test]
1143    fn test_count_cells_with_escaped_pipes() {
1144        // In GFM tables, escape handling happens BEFORE cell splitting.
1145        // Inline code does NOT protect pipes - they still act as cell delimiters.
1146        // To include a literal pipe in a table cell, you MUST escape it with \|
1147
1148        // Basic table structure
1149        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1150        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1151        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1152
1153        // Escaped pipes: \| keeps the pipe as content
1154        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1155        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1156
1157        // Escaped pipes inside backticks (correct way to include | in code in tables)
1158        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1159
1160        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1161        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1162        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
1163
1164        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
1165        // This matches GitHub's actual rendering where `a | b` splits into two cells
1166        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
1167        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
1168        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
1169
1170        // The regex example from Issue #34 - pipes in regex patterns need escaping
1171        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
1172        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
1173        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
1174        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1175    }
1176
1177    #[test]
1178    fn test_determine_pipe_style() {
1179        // All pipe styles
1180        assert_eq!(
1181            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1182            Some("leading_and_trailing")
1183        );
1184        assert_eq!(
1185            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1186            Some("leading_only")
1187        );
1188        assert_eq!(
1189            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1190            Some("trailing_only")
1191        );
1192        assert_eq!(
1193            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1194            Some("no_leading_or_trailing")
1195        );
1196
1197        // With whitespace
1198        assert_eq!(
1199            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1200            Some("leading_and_trailing")
1201        );
1202        assert_eq!(
1203            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1204            Some("leading_only")
1205        );
1206
1207        // No pipes
1208        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1209        assert_eq!(TableUtils::determine_pipe_style(""), None);
1210        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1211
1212        // Single pipe cases
1213        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1214        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1215        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1216    }
1217
1218    #[test]
1219    fn test_find_table_blocks_simple() {
1220        let content = "| Header 1 | Header 2 |
1221|-----------|-----------|
1222| Cell 1    | Cell 2    |
1223| Cell 3    | Cell 4    |";
1224
1225        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1226
1227        let tables = TableUtils::find_table_blocks(content, &ctx);
1228        assert_eq!(tables.len(), 1);
1229
1230        let table = &tables[0];
1231        assert_eq!(table.start_line, 0);
1232        assert_eq!(table.end_line, 3);
1233        assert_eq!(table.header_line, 0);
1234        assert_eq!(table.delimiter_line, 1);
1235        assert_eq!(table.content_lines, vec![2, 3]);
1236    }
1237
1238    #[test]
1239    fn test_find_table_blocks_multiple() {
1240        let content = "Some text
1241
1242| Table 1 | Col A |
1243|----------|-------|
1244| Data 1   | Val 1 |
1245
1246More text
1247
1248| Table 2 | Col 2 |
1249|----------|-------|
1250| Data 2   | Data  |";
1251
1252        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1253
1254        let tables = TableUtils::find_table_blocks(content, &ctx);
1255        assert_eq!(tables.len(), 2);
1256
1257        // First table
1258        assert_eq!(tables[0].start_line, 2);
1259        assert_eq!(tables[0].end_line, 4);
1260        assert_eq!(tables[0].header_line, 2);
1261        assert_eq!(tables[0].delimiter_line, 3);
1262        assert_eq!(tables[0].content_lines, vec![4]);
1263
1264        // Second table
1265        assert_eq!(tables[1].start_line, 8);
1266        assert_eq!(tables[1].end_line, 10);
1267        assert_eq!(tables[1].header_line, 8);
1268        assert_eq!(tables[1].delimiter_line, 9);
1269        assert_eq!(tables[1].content_lines, vec![10]);
1270    }
1271
1272    #[test]
1273    fn test_find_table_blocks_no_content_rows() {
1274        let content = "| Header 1 | Header 2 |
1275|-----------|-----------|
1276
1277Next paragraph";
1278
1279        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1280
1281        let tables = TableUtils::find_table_blocks(content, &ctx);
1282        assert_eq!(tables.len(), 1);
1283
1284        let table = &tables[0];
1285        assert_eq!(table.start_line, 0);
1286        assert_eq!(table.end_line, 1); // Just header and delimiter
1287        assert_eq!(table.content_lines.len(), 0);
1288    }
1289
1290    #[test]
1291    fn test_find_table_blocks_in_code_block() {
1292        let content = "```
1293| Not | A | Table |
1294|-----|---|-------|
1295| In  | Code | Block |
1296```
1297
1298| Real | Table |
1299|------|-------|
1300| Data | Here  |";
1301
1302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1303
1304        let tables = TableUtils::find_table_blocks(content, &ctx);
1305        assert_eq!(tables.len(), 1); // Only the table outside code block
1306
1307        let table = &tables[0];
1308        assert_eq!(table.header_line, 6);
1309        assert_eq!(table.delimiter_line, 7);
1310    }
1311
1312    #[test]
1313    fn test_find_table_blocks_no_tables() {
1314        let content = "Just regular text
1315No tables here
1316- List item with | pipe
1317* Another list item";
1318
1319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320
1321        let tables = TableUtils::find_table_blocks(content, &ctx);
1322        assert_eq!(tables.len(), 0);
1323    }
1324
1325    #[test]
1326    fn test_find_table_blocks_malformed() {
1327        let content = "| Header without delimiter |
1328| This looks like table |
1329But no delimiter row
1330
1331| Proper | Table |
1332|---------|-------|
1333| Data    | Here  |";
1334
1335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1336
1337        let tables = TableUtils::find_table_blocks(content, &ctx);
1338        assert_eq!(tables.len(), 1); // Only the proper table
1339        assert_eq!(tables[0].header_line, 4);
1340    }
1341
1342    #[test]
1343    fn test_edge_cases() {
1344        // Test empty content
1345        assert!(!TableUtils::is_potential_table_row(""));
1346        assert!(!TableUtils::is_delimiter_row(""));
1347        assert_eq!(TableUtils::count_cells(""), 0);
1348        assert_eq!(TableUtils::determine_pipe_style(""), None);
1349
1350        // Test whitespace only
1351        assert!(!TableUtils::is_potential_table_row("   "));
1352        assert!(!TableUtils::is_delimiter_row("   "));
1353        assert_eq!(TableUtils::count_cells("   "), 0);
1354        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1355
1356        // Test single character
1357        assert!(!TableUtils::is_potential_table_row("|"));
1358        assert!(!TableUtils::is_delimiter_row("|"));
1359        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1360
1361        // Test very long lines are valid table rows (no length limit)
1362        // Test both single-column and multi-column long lines
1363        let long_single = format!("| {} |", "a".repeat(200));
1364        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1365
1366        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1367        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1368
1369        // Test unicode
1370        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1371        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1372        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1373    }
1374
1375    #[test]
1376    fn test_table_block_struct() {
1377        let block = TableBlock {
1378            start_line: 0,
1379            end_line: 5,
1380            header_line: 0,
1381            delimiter_line: 1,
1382            content_lines: vec![2, 3, 4, 5],
1383            list_context: None,
1384        };
1385
1386        // Test Debug trait
1387        let debug_str = format!("{block:?}");
1388        assert!(debug_str.contains("TableBlock"));
1389        assert!(debug_str.contains("start_line: 0"));
1390
1391        // Test Clone trait
1392        let cloned = block.clone();
1393        assert_eq!(cloned.start_line, block.start_line);
1394        assert_eq!(cloned.end_line, block.end_line);
1395        assert_eq!(cloned.header_line, block.header_line);
1396        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1397        assert_eq!(cloned.content_lines, block.content_lines);
1398        assert!(cloned.list_context.is_none());
1399    }
1400
1401    #[test]
1402    fn test_split_table_row() {
1403        // Basic split
1404        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1405        assert_eq!(cells.len(), 3);
1406        assert_eq!(cells[0].trim(), "Cell 1");
1407        assert_eq!(cells[1].trim(), "Cell 2");
1408        assert_eq!(cells[2].trim(), "Cell 3");
1409
1410        // Without trailing pipe
1411        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1412        assert_eq!(cells.len(), 2);
1413
1414        // Empty cells
1415        let cells = TableUtils::split_table_row("| | | |");
1416        assert_eq!(cells.len(), 3);
1417
1418        // Single cell
1419        let cells = TableUtils::split_table_row("| Cell |");
1420        assert_eq!(cells.len(), 1);
1421        assert_eq!(cells[0].trim(), "Cell");
1422
1423        // No pipes
1424        let cells = TableUtils::split_table_row("No pipes here");
1425        assert_eq!(cells.len(), 0);
1426    }
1427
1428    #[test]
1429    fn test_split_table_row_with_escaped_pipes() {
1430        // Escaped pipes should be preserved in cell content
1431        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1432        assert_eq!(cells.len(), 2);
1433        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1434
1435        // Double backslash + pipe is NOT escaped
1436        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1437        assert_eq!(cells.len(), 3);
1438    }
1439
1440    #[test]
1441    fn test_split_table_row_with_flavor_mkdocs() {
1442        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1443        let cells =
1444            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1445        assert_eq!(cells.len(), 2);
1446        assert!(
1447            cells[1].contains("`x | y`"),
1448            "Inline code with pipe should be single cell in MkDocs flavor"
1449        );
1450
1451        // Multiple pipes in inline code
1452        let cells =
1453            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1454        assert_eq!(cells.len(), 2);
1455        assert!(cells[1].contains("`a | b | c`"));
1456    }
1457
1458    #[test]
1459    fn test_split_table_row_with_flavor_standard() {
1460        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
1461        let cells =
1462            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1463        // In GFM, `x | y` splits into separate cells
1464        assert_eq!(cells.len(), 3);
1465    }
1466
1467    // === extract_blockquote_prefix tests ===
1468
1469    #[test]
1470    fn test_extract_blockquote_prefix_no_blockquote() {
1471        // Regular table row without blockquote
1472        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1473        assert_eq!(prefix, "");
1474        assert_eq!(content, "| H1 | H2 |");
1475    }
1476
1477    #[test]
1478    fn test_extract_blockquote_prefix_single_level() {
1479        // Single blockquote level
1480        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1481        assert_eq!(prefix, "> ");
1482        assert_eq!(content, "| H1 | H2 |");
1483    }
1484
1485    #[test]
1486    fn test_extract_blockquote_prefix_double_level() {
1487        // Double blockquote level
1488        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1489        assert_eq!(prefix, ">> ");
1490        assert_eq!(content, "| H1 | H2 |");
1491    }
1492
1493    #[test]
1494    fn test_extract_blockquote_prefix_triple_level() {
1495        // Triple blockquote level
1496        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1497        assert_eq!(prefix, ">>> ");
1498        assert_eq!(content, "| H1 | H2 |");
1499    }
1500
1501    #[test]
1502    fn test_extract_blockquote_prefix_with_spaces() {
1503        // Blockquote with spaces between markers
1504        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1505        assert_eq!(prefix, "> > ");
1506        assert_eq!(content, "| H1 | H2 |");
1507    }
1508
1509    #[test]
1510    fn test_extract_blockquote_prefix_indented() {
1511        // Indented blockquote
1512        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1513        assert_eq!(prefix, "  > ");
1514        assert_eq!(content, "| H1 | H2 |");
1515    }
1516
1517    #[test]
1518    fn test_extract_blockquote_prefix_no_space_after() {
1519        // Blockquote without space after marker
1520        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1521        assert_eq!(prefix, ">");
1522        assert_eq!(content, "| H1 | H2 |");
1523    }
1524
1525    #[test]
1526    fn test_determine_pipe_style_in_blockquote() {
1527        // determine_pipe_style should handle blockquotes correctly
1528        assert_eq!(
1529            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1530            Some("leading_and_trailing")
1531        );
1532        assert_eq!(
1533            TableUtils::determine_pipe_style("> H1 | H2"),
1534            Some("no_leading_or_trailing")
1535        );
1536        assert_eq!(
1537            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1538            Some("leading_and_trailing")
1539        );
1540        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1541    }
1542
1543    #[test]
1544    fn test_list_table_delimiter_requires_indentation() {
1545        // Test case: list item contains pipe, but delimiter line is at column 1
1546        // This should NOT be detected as a list table since the delimiter has no indentation.
1547        // The result is a non-list table starting at line 0 (the list item becomes the header)
1548        // but list_context should be None.
1549        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1550        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1551        let tables = TableUtils::find_table_blocks(content, &ctx);
1552
1553        // The table will be detected starting at line 0, but crucially it should NOT have
1554        // list_context set, meaning it won't be treated as a list-table for column count purposes
1555        assert_eq!(tables.len(), 1, "Should find exactly one table");
1556        assert!(
1557            tables[0].list_context.is_none(),
1558            "Should NOT have list context since delimiter has no indentation"
1559        );
1560    }
1561
1562    #[test]
1563    fn test_list_table_with_properly_indented_delimiter() {
1564        // Test case: list item with table header, delimiter properly indented
1565        // This SHOULD be detected as a list table
1566        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1567        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1568        let tables = TableUtils::find_table_blocks(content, &ctx);
1569
1570        // Should find exactly one list-table starting at line 0
1571        assert_eq!(tables.len(), 1, "Should find exactly one table");
1572        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1573        assert!(
1574            tables[0].list_context.is_some(),
1575            "Should be a list table since delimiter is properly indented"
1576        );
1577    }
1578}
rumdl_lib/utils/table_utils.rs

rumdl_lib/utils/
table_utils.rs