Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5use super::blockquote::strip_blockquote_prefix;
6
7/// Represents a table block in the document
8#[derive(Debug, Clone)]
9pub struct TableBlock {
10    pub start_line: usize,
11    pub end_line: usize,
12    pub header_line: usize,
13    pub delimiter_line: usize,
14    pub content_lines: Vec<usize>,
15    /// If the table is inside a list item, this contains:
16    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
17    /// - The content indent (number of spaces for continuation lines)
18    pub list_context: Option<ListTableContext>,
19}
20
21/// Context information for tables inside list items
22#[derive(Debug, Clone)]
23pub struct ListTableContext {
24    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
25    pub list_prefix: String,
26    /// Number of spaces for continuation lines to align with content
27    pub content_indent: usize,
28}
29
30/// Shared table detection utilities
31pub struct TableUtils;
32
33impl TableUtils {
34    /// Returns true if the line has at least one unescaped pipe separator outside inline code and
35    /// math spans.
36    ///
37    /// Skips pipes inside backtick code spans (`` `...` ``) and dollar-sign math spans (`$...$`,
38    /// `$$...$$`) to avoid false positives from prose like `` `echo a | sed 's/a/b/'` `` or math
39    /// like `$|S|$` (absolute value notation).
40    ///
41    /// Note: a bare `$` that opens a span without a matching closing `$` keeps the scanner in
42    /// math mode for the rest of the line, suppressing any subsequent pipes. This is conservative
43    /// and means that `$5 | $10`-style price comparisons (without outer pipes) are not detected
44    /// as table separators — an accepted trade-off to avoid false positives from real math.
45    fn has_unescaped_pipe_outside_spans(text: &str) -> bool {
46        let chars: Vec<char> = text.chars().collect();
47        let mut i = 0;
48        let mut in_code = false;
49        let mut code_delim_len = 0usize;
50        let mut in_math = false;
51        let mut math_delim_len = 0usize;
52
53        while i < chars.len() {
54            let ch = chars[i];
55
56            if ch == '\\' && !in_code && !in_math {
57                // Skip escaped character (only outside code and math spans —
58                // backslashes are literal inside code spans per CommonMark).
59                i += if i + 1 < chars.len() { 2 } else { 1 };
60                continue;
61            }
62
63            if ch == '`' && !in_math {
64                let mut run = 1usize;
65                while i + run < chars.len() && chars[i + run] == '`' {
66                    run += 1;
67                }
68
69                if in_code {
70                    if run == code_delim_len {
71                        in_code = false;
72                        code_delim_len = 0;
73                    }
74                    // Mismatched backtick run inside a code span: consumed but span stays open.
75                } else {
76                    in_code = true;
77                    code_delim_len = run;
78                }
79
80                i += run;
81                continue;
82            }
83
84            if ch == '$' && !in_code {
85                let mut run = 1usize;
86                while i + run < chars.len() && chars[i + run] == '$' {
87                    run += 1;
88                }
89
90                if in_math {
91                    if run == math_delim_len {
92                        in_math = false;
93                        math_delim_len = 0;
94                    }
95                    // Mismatched $-run inside a math span: consumed but span stays open.
96                } else {
97                    in_math = true;
98                    math_delim_len = run;
99                }
100
101                i += run;
102                continue;
103            }
104
105            if ch == '|' && !in_code && !in_math {
106                return true;
107            }
108
109            i += 1;
110        }
111
112        false
113    }
114
115    /// Check if a line looks like a potential table row
116    pub fn is_potential_table_row(line: &str) -> bool {
117        let trimmed = line.trim();
118        if trimmed.is_empty() || !trimmed.contains('|') {
119            return false;
120        }
121
122        // Skip lines that are clearly not table rows
123        // Unordered list items with space or tab after marker
124        if trimmed.starts_with("- ")
125            || trimmed.starts_with("* ")
126            || trimmed.starts_with("+ ")
127            || trimmed.starts_with("-\t")
128            || trimmed.starts_with("*\t")
129            || trimmed.starts_with("+\t")
130        {
131            return false;
132        }
133
134        // Skip ordered list items: digits followed by . or ) then space/tab
135        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
136            && first_non_digit > 0
137        {
138            let after_digits = &trimmed[first_non_digit..];
139            if after_digits.starts_with(". ")
140                || after_digits.starts_with(".\t")
141                || after_digits.starts_with(") ")
142                || after_digits.starts_with(")\t")
143            {
144                return false;
145            }
146        }
147
148        // Skip ATX headings (# through ######)
149        if trimmed.starts_with('#') {
150            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
151            if hash_count <= 6 {
152                let after_hashes = &trimmed[hash_count..];
153                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
154                    return false;
155                }
156            }
157        }
158
159        // For rows without explicit outer pipes, require a real separator outside
160        // inline code and math spans to avoid prose/command false positives.
161        let has_outer_pipes = trimmed.starts_with('|') && trimmed.ends_with('|');
162        if !has_outer_pipes && !Self::has_unescaped_pipe_outside_spans(trimmed) {
163            return false;
164        }
165
166        // Must have at least 2 parts when split by |
167        let parts: Vec<&str> = trimmed.split('|').collect();
168        if parts.len() < 2 {
169            return false;
170        }
171
172        // Check if it looks like a table row by having reasonable content between pipes
173        let mut valid_parts = 0;
174        let mut total_non_empty_parts = 0;
175
176        for part in &parts {
177            let part_trimmed = part.trim();
178            // Skip empty parts (from leading/trailing pipes)
179            if part_trimmed.is_empty() {
180                continue;
181            }
182            total_non_empty_parts += 1;
183
184            // Count parts that look like table cells (reasonable content, no newlines)
185            if !part_trimmed.contains('\n') {
186                valid_parts += 1;
187            }
188        }
189
190        // Check if all non-empty parts are valid (no newlines)
191        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
192            // Some cells contain newlines, not a valid table row
193            return false;
194        }
195
196        // GFM allows tables with all empty cells (e.g., |||)
197        // These are valid if they have proper table formatting (leading and trailing pipes)
198        if total_non_empty_parts == 0 {
199            // Empty cells are only valid with proper pipe formatting
200            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
201        }
202
203        // GFM allows single-column tables, so >= 1 valid part is enough
204        // when the line has proper table formatting (pipes)
205        if trimmed.starts_with('|') && trimmed.ends_with('|') {
206            // Properly formatted table row with pipes on both ends
207            valid_parts >= 1
208        } else {
209            // For rows without proper pipe formatting, require at least 2 cells
210            valid_parts >= 2
211        }
212    }
213
214    /// Check if a line is a table delimiter row (e.g., |---|---|)
215    pub fn is_delimiter_row(line: &str) -> bool {
216        let trimmed = line.trim();
217        if !trimmed.contains('|') || !trimmed.contains('-') {
218            return false;
219        }
220
221        // Split by pipes and check each part
222        let parts: Vec<&str> = trimmed.split('|').collect();
223        let mut valid_delimiter_parts = 0;
224        let mut total_non_empty_parts = 0;
225
226        for part in &parts {
227            let part_trimmed = part.trim();
228            if part_trimmed.is_empty() {
229                continue; // Skip empty parts from leading/trailing pipes
230            }
231
232            total_non_empty_parts += 1;
233
234            // Check if this part looks like a delimiter (contains dashes and optionally colons)
235            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
236                valid_delimiter_parts += 1;
237            }
238        }
239
240        // All non-empty parts must be valid delimiters, and there must be at least one
241        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
242    }
243
244    /// Find all table blocks in the content with optimized detection
245    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
246    pub fn find_table_blocks_with_code_info(
247        content: &str,
248        code_blocks: &[(usize, usize)],
249        code_spans: &[crate::lint_context::CodeSpan],
250        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
251    ) -> Vec<TableBlock> {
252        let lines: Vec<&str> = content.lines().collect();
253        let mut tables = Vec::new();
254        let mut i = 0;
255
256        // Pre-compute line positions for efficient code block checking
257        let mut line_positions = Vec::with_capacity(lines.len());
258        let mut pos = 0;
259        for line in &lines {
260            line_positions.push(pos);
261            pos += line.len() + 1; // +1 for newline
262        }
263
264        // Stack of active list content indents for continuation table tracking.
265        // Supports nested lists: when a child list is seen, we push; when we
266        // dedent past a level, we pop back to the enclosing list.
267        let mut list_indent_stack: Vec<usize> = Vec::new();
268
269        while i < lines.len() {
270            // Skip lines in code blocks, code spans, or HTML comments
271            let line_start = line_positions[i];
272            let in_code =
273                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start) || {
274                    // Binary search on sorted code spans
275                    let idx = code_spans.partition_point(|span| span.byte_offset <= line_start);
276                    idx > 0 && line_start < code_spans[idx - 1].byte_end
277                };
278            let in_html_comment = {
279                // Binary search on sorted HTML comment ranges
280                let idx = html_comment_ranges.partition_point(|range| range.start <= line_start);
281                idx > 0 && line_start < html_comment_ranges[idx - 1].end
282            };
283
284            if in_code || in_html_comment {
285                i += 1;
286                continue;
287            }
288
289            // Strip blockquote prefix for table detection
290            let line_content = strip_blockquote_prefix(lines[i]);
291
292            // Update active list tracking
293            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
294            if !list_prefix.is_empty() {
295                // Line has a list marker. Pop any deeper/equal levels, then push this one.
296                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
297                    list_indent_stack.pop();
298                }
299                list_indent_stack.push(content_indent);
300            } else if !line_content.trim().is_empty() {
301                // Non-blank line without a marker: pop any levels we've dedented past
302                let leading = line_content.len() - line_content.trim_start().len();
303                while list_indent_stack.last().is_some_and(|&top| leading < top) {
304                    list_indent_stack.pop();
305                }
306            }
307            // Blank lines keep the stack unchanged (blank lines don't end list items)
308
309            // Check if this is a list item that contains a table row on the same line,
310            // or a continuation table indented under an active list item
311            let (is_same_line_list_table, effective_content) =
312                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
313                    (true, list_content)
314                } else {
315                    (false, line_content)
316                };
317
318            // Detect continuation list tables: no marker on this line, but indented
319            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
320            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
321                let leading = line_content.len() - line_content.trim_start().len();
322                // Find the deepest list level this line is indented under
323                list_indent_stack
324                    .iter()
325                    .rev()
326                    .find(|&&indent| leading >= indent)
327                    .copied()
328            } else {
329                None
330            };
331
332            let is_continuation_list_table = continuation_indent.is_some()
333                && {
334                    let indent = continuation_indent.unwrap();
335                    let leading = line_content.len() - line_content.trim_start().len();
336                    // Per CommonMark, 4+ spaces beyond content indent is a code block
337                    leading < indent + 4
338                }
339                && Self::is_potential_table_row(effective_content);
340
341            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
342
343            // For continuation list tables, use the matched list indent
344            let effective_content_indent = if is_same_line_list_table {
345                content_indent
346            } else if is_continuation_list_table {
347                continuation_indent.unwrap()
348            } else {
349                0
350            };
351
352            // Look for potential table start
353            if is_any_list_table || Self::is_potential_table_row(effective_content) {
354                // For list tables (same-line or continuation), check indented continuation lines
355                // For regular tables, check the next line directly
356                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
357                    let next_raw = strip_blockquote_prefix(lines[i + 1]);
358                    if is_any_list_table {
359                        // Verify the delimiter line has proper indentation
360                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
361                        if leading_spaces >= effective_content_indent {
362                            // Has proper indentation, strip it and check as delimiter
363                            (
364                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
365                                true,
366                            )
367                        } else {
368                            // Not enough indentation - not a list table
369                            (next_raw, false)
370                        }
371                    } else {
372                        (next_raw, true)
373                    }
374                } else {
375                    ("", true)
376                };
377
378                // For list tables, only accept if delimiter has valid indentation
379                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
380
381                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
382                    // Found a table! Find its end
383                    let table_start = i;
384                    let header_line = i;
385                    let delimiter_line = i + 1;
386                    let mut table_end = i + 1; // Include the delimiter row
387                    let mut content_lines = Vec::new();
388
389                    // Continue while we have table rows
390                    let mut j = i + 2;
391                    while j < lines.len() {
392                        let line = lines[j];
393                        // Strip blockquote prefix for checking
394                        let raw_content = strip_blockquote_prefix(line);
395
396                        // For list tables, strip expected indentation
397                        let line_content = if effective_is_list_table {
398                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
399                        } else {
400                            raw_content
401                        };
402
403                        if line_content.trim().is_empty() {
404                            // Empty line ends the table
405                            break;
406                        }
407
408                        // For list tables, the continuation line must have proper indentation
409                        if effective_is_list_table {
410                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
411                            if leading_spaces < effective_content_indent {
412                                // Not enough indentation - end of table
413                                break;
414                            }
415                        }
416
417                        if Self::is_potential_table_row(line_content) {
418                            content_lines.push(j);
419                            table_end = j;
420                            j += 1;
421                        } else {
422                            // Non-table line ends the table
423                            break;
424                        }
425                    }
426
427                    let list_context = if effective_is_list_table {
428                        if is_same_line_list_table {
429                            // Same-line: prefix is the actual list marker (e.g., "- ")
430                            Some(ListTableContext {
431                                list_prefix: list_prefix.to_string(),
432                                content_indent: effective_content_indent,
433                            })
434                        } else {
435                            // Continuation: prefix is the indentation spaces
436                            Some(ListTableContext {
437                                list_prefix: " ".repeat(effective_content_indent),
438                                content_indent: effective_content_indent,
439                            })
440                        }
441                    } else {
442                        None
443                    };
444
445                    tables.push(TableBlock {
446                        start_line: table_start,
447                        end_line: table_end,
448                        header_line,
449                        delimiter_line,
450                        content_lines,
451                        list_context,
452                    });
453                    i = table_end + 1;
454                } else {
455                    i += 1;
456                }
457            } else {
458                i += 1;
459            }
460        }
461
462        tables
463    }
464
465    /// Strip list continuation indentation from a line.
466    /// For lines that are continuations of a list item's content, strip the expected indent.
467    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
468        let bytes = line.as_bytes();
469        let mut spaces = 0;
470
471        for &b in bytes {
472            if b == b' ' {
473                spaces += 1;
474            } else if b == b'\t' {
475                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
476                spaces = (spaces / 4 + 1) * 4;
477            } else {
478                break;
479            }
480
481            if spaces >= expected_indent {
482                break;
483            }
484        }
485
486        // Strip at most expected_indent characters
487        let strip_count = spaces.min(expected_indent).min(line.len());
488        // Count actual bytes to strip (handling tabs)
489        let mut byte_count = 0;
490        let mut counted_spaces = 0;
491        for &b in bytes {
492            if counted_spaces >= strip_count {
493                break;
494            }
495            if b == b' ' {
496                counted_spaces += 1;
497                byte_count += 1;
498            } else if b == b'\t' {
499                counted_spaces = (counted_spaces / 4 + 1) * 4;
500                byte_count += 1;
501            } else {
502                break;
503            }
504        }
505
506        &line[byte_count..]
507    }
508
509    /// Find all table blocks in the content with optimized detection
510    /// This is a backward-compatible wrapper that accepts LintContext
511    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
512        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
513    }
514
515    /// Count the number of cells in a table row
516    pub fn count_cells(row: &str) -> usize {
517        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
518    }
519
520    /// Count the number of cells in a table row with flavor-specific behavior
521    ///
522    /// Pipes inside code spans are treated as content, not cell delimiters.
523    ///
524    /// This function strips blockquote prefixes before counting cells, so it works
525    /// correctly for tables inside blockquotes.
526    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
527        // Strip blockquote prefix if present before counting cells
528        let (_, content) = Self::extract_blockquote_prefix(row);
529        Self::split_table_row_with_flavor(content, flavor).len()
530    }
531
532    /// Count the number of consecutive backslashes immediately preceding `pos` in `chars`.
533    fn count_preceding_backslashes(chars: &[char], pos: usize) -> usize {
534        let mut count = 0;
535        let mut k = pos;
536        while k > 0 {
537            k -= 1;
538            if chars[k] == '\\' {
539                count += 1;
540            } else {
541                break;
542            }
543        }
544        count
545    }
546
547    /// Mask pipes inside inline code blocks with a placeholder character.
548    ///
549    /// Backticks preceded by an odd number of backslashes are escaped (literal text)
550    /// and do not open or close code spans. An even number of backslashes means the
551    /// backslashes themselves are escaped, so the backtick is a real delimiter.
552    pub fn mask_pipes_in_inline_code(text: &str) -> String {
553        let mut result = String::new();
554        let chars: Vec<char> = text.chars().collect();
555        let mut i = 0;
556
557        while i < chars.len() {
558            if chars[i] == '`' {
559                // A backtick preceded by an odd number of backslashes is escaped
560                let preceding = Self::count_preceding_backslashes(&chars, i);
561                if preceding % 2 != 0 {
562                    // Escaped backtick -- treat as literal text, not a code span opener
563                    result.push(chars[i]);
564                    i += 1;
565                    continue;
566                }
567
568                // Count consecutive backticks at start
569                let start = i;
570                let mut backtick_count = 0;
571                while i < chars.len() && chars[i] == '`' {
572                    backtick_count += 1;
573                    i += 1;
574                }
575
576                // Look for matching closing backticks
577                let mut found_closing = false;
578                let mut j = i;
579
580                while j < chars.len() {
581                    if chars[j] == '`' {
582                        // Per CommonMark spec, backslash escapes do NOT work inside code
583                        // spans -- all characters including backslashes are literal. So we
584                        // do NOT check count_preceding_backslashes here (only for the
585                        // opening backtick above).
586
587                        // Count potential closing backticks
588                        let close_start = j;
589                        let mut close_count = 0;
590                        while j < chars.len() && chars[j] == '`' {
591                            close_count += 1;
592                            j += 1;
593                        }
594
595                        if close_count == backtick_count {
596                            // Found matching closing backticks
597                            found_closing = true;
598
599                            // Valid inline code - add with pipes masked
600                            result.extend(chars[start..i].iter());
601
602                            for &ch in chars.iter().take(close_start).skip(i) {
603                                if ch == '|' {
604                                    result.push('_'); // Mask pipe with underscore
605                                } else {
606                                    result.push(ch);
607                                }
608                            }
609
610                            result.extend(chars[close_start..j].iter());
611                            i = j;
612                            break;
613                        }
614                        // If not matching, continue searching (j is already past these backticks)
615                    } else {
616                        j += 1;
617                    }
618                }
619
620                if !found_closing {
621                    // No matching closing found, treat as regular text
622                    result.extend(chars[start..i].iter());
623                }
624            } else {
625                result.push(chars[i]);
626                i += 1;
627            }
628        }
629
630        result
631    }
632
633    /// Mask escaped pipes for accurate table cell parsing
634    ///
635    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
636    /// - `\|` → escaped pipe → masked (stays as cell content)
637    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
638    ///
639    /// This function only handles escaped pipes. Pipes inside inline code spans
640    /// are handled separately by `mask_pipes_in_inline_code`.
641    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
642        let mut result = String::new();
643        let chars: Vec<char> = text.chars().collect();
644        let mut i = 0;
645
646        while i < chars.len() {
647            if chars[i] == '\\' {
648                if i + 1 < chars.len() && chars[i + 1] == '\\' {
649                    // Escaped backslash: \\ → push both and continue
650                    // The next character (if it's a pipe) will be a real delimiter
651                    result.push('\\');
652                    result.push('\\');
653                    i += 2;
654                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
655                    // Escaped pipe: \| → mask the pipe
656                    result.push('\\');
657                    result.push('_'); // Mask the pipe
658                    i += 2;
659                } else {
660                    // Single backslash not followed by \ or | → just push it
661                    result.push(chars[i]);
662                    i += 1;
663                }
664            } else {
665                result.push(chars[i]);
666                i += 1;
667            }
668        }
669
670        result
671    }
672
673    /// Split a table row into individual cell contents with flavor-specific behavior.
674    ///
675    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
676    /// This is the foundation for both cell counting and cell content extraction.
677    ///
678    /// Pipes inside code spans are treated as content, not cell delimiters.
679    pub fn split_table_row_with_flavor(row: &str, _flavor: crate::config::MarkdownFlavor) -> Vec<String> {
680        let trimmed = row.trim();
681
682        if !trimmed.contains('|') {
683            return Vec::new();
684        }
685
686        // First, mask escaped pipes (same for all flavors)
687        let masked = Self::mask_pipes_for_table_parsing(trimmed);
688
689        // Mask pipes inside inline code for all flavors
690        let final_masked = Self::mask_pipes_in_inline_code(&masked);
691
692        let has_leading = final_masked.starts_with('|');
693        let has_trailing = final_masked.ends_with('|');
694
695        let mut masked_content = final_masked.as_str();
696        let mut orig_content = trimmed;
697
698        if has_leading {
699            masked_content = &masked_content[1..];
700            orig_content = &orig_content[1..];
701        }
702
703        // Track whether we actually strip a trailing pipe
704        let stripped_trailing = has_trailing && !masked_content.is_empty();
705        if stripped_trailing {
706            masked_content = &masked_content[..masked_content.len() - 1];
707            orig_content = &orig_content[..orig_content.len() - 1];
708        }
709
710        // Handle edge cases for degenerate inputs
711        if masked_content.is_empty() {
712            if stripped_trailing {
713                // "||" case: two pipes with empty content between = one empty cell
714                return vec![String::new()];
715            } else {
716                // "|" case: single pipe, not a valid table row
717                return Vec::new();
718            }
719        }
720
721        let masked_parts: Vec<&str> = masked_content.split('|').collect();
722        let mut cells = Vec::new();
723        let mut pos = 0;
724
725        for masked_cell in masked_parts {
726            let cell_len = masked_cell.len();
727            let orig_cell = if pos + cell_len <= orig_content.len() {
728                &orig_content[pos..pos + cell_len]
729            } else {
730                masked_cell
731            };
732            cells.push(orig_cell.to_string());
733            pos += cell_len + 1; // +1 for the pipe delimiter
734        }
735
736        cells
737    }
738
739    /// Split a table row into individual cell contents using Standard/GFM behavior.
740    pub fn split_table_row(row: &str) -> Vec<String> {
741        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
742    }
743
744    /// Determine the pipe style of a table row
745    ///
746    /// Handles tables inside blockquotes by stripping the blockquote prefix
747    /// before analyzing the pipe style.
748    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
749        // Strip blockquote prefix if present before analyzing pipe style
750        let content = strip_blockquote_prefix(line);
751        let trimmed = content.trim();
752        if !trimmed.contains('|') {
753            return None;
754        }
755
756        let has_leading = trimmed.starts_with('|');
757        let has_trailing = trimmed.ends_with('|');
758
759        match (has_leading, has_trailing) {
760            (true, true) => Some("leading_and_trailing"),
761            (true, false) => Some("leading_only"),
762            (false, true) => Some("trailing_only"),
763            (false, false) => Some("no_leading_or_trailing"),
764        }
765    }
766
767    /// Extract blockquote prefix from a line, returning (prefix, content).
768    ///
769    /// This is useful for stripping the prefix before processing, then restoring it after.
770    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
771    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
772        // Find where the actual content starts (after blockquote markers and spaces)
773        let bytes = line.as_bytes();
774        let mut pos = 0;
775
776        // Skip leading whitespace (indent before blockquote marker)
777        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
778            pos += 1;
779        }
780
781        // If no blockquote marker, return empty prefix
782        if pos >= bytes.len() || bytes[pos] != b'>' {
783            return ("", line);
784        }
785
786        // Skip all blockquote markers and spaces
787        while pos < bytes.len() {
788            if bytes[pos] == b'>' {
789                pos += 1;
790                // Skip optional space after >
791                if pos < bytes.len() && bytes[pos] == b' ' {
792                    pos += 1;
793                }
794            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
795                pos += 1;
796            } else {
797                break;
798            }
799        }
800
801        // Split at the position where content starts
802        (&line[..pos], &line[pos..])
803    }
804
805    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
806    ///
807    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
808    /// Returns:
809    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
810    /// - content: The content after the list marker
811    /// - content_indent: The number of spaces needed for continuation lines to align with content
812    ///
813    /// For example:
814    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
815    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
816    /// - `"  - table"` returns `("  - ", "table", 4)`
817    ///
818    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
819    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
820        let bytes = line.as_bytes();
821
822        // Skip leading whitespace
823        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
824        let mut pos = leading_spaces;
825
826        if pos >= bytes.len() {
827            return ("", line, 0);
828        }
829
830        // Check for unordered list marker: -, *, +
831        if matches!(bytes[pos], b'-' | b'*' | b'+') {
832            pos += 1;
833
834            // Must be followed by space or tab (or end of line for marker-only lines)
835            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
836                // Skip the space after marker if present
837                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
838                    pos += 1;
839                }
840                let content_indent = pos;
841                return (&line[..pos], &line[pos..], content_indent);
842            }
843            // Not a list marker (e.g., "-word" or "--")
844            return ("", line, 0);
845        }
846
847        // Check for ordered list marker: digits followed by . or ) then space
848        if bytes[pos].is_ascii_digit() {
849            let digit_start = pos;
850            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
851                pos += 1;
852            }
853
854            // Must have at least one digit
855            if pos > digit_start && pos < bytes.len() {
856                // Check for . or ) followed by space/tab
857                if bytes[pos] == b'.' || bytes[pos] == b')' {
858                    pos += 1;
859                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
860                        // Skip the space after marker if present
861                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
862                            pos += 1;
863                        }
864                        let content_indent = pos;
865                        return (&line[..pos], &line[pos..], content_indent);
866                    }
867                }
868            }
869        }
870
871        ("", line, 0)
872    }
873
874    /// Extract the table row content from a line, stripping any list/blockquote prefix.
875    ///
876    /// This is useful for processing table rows that may be inside list items or blockquotes.
877    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
878    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
879        // First strip blockquote prefix
880        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
881
882        // Then handle list prefix if present
883        if let Some(ref list_ctx) = table_block.list_context {
884            if line_index == 0 {
885                // Header line: strip list prefix (handles both markers and indentation)
886                after_blockquote
887                    .strip_prefix(&list_ctx.list_prefix)
888                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
889            } else {
890                // Continuation lines: strip indentation
891                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
892            }
893        } else {
894            after_blockquote
895        }
896    }
897
898    /// Check if the content after a list marker looks like a table row.
899    /// This is used to detect tables that start on the same line as a list marker.
900    pub fn is_list_item_with_table_row(line: &str) -> bool {
901        let (prefix, content, _) = Self::extract_list_prefix(line);
902        if prefix.is_empty() {
903            return false;
904        }
905
906        // Check if the content after the list marker is a table row
907        // It must start with | (proper table format within a list)
908        let trimmed = content.trim();
909        if !trimmed.starts_with('|') {
910            return false;
911        }
912
913        // Use our table row detection on the content
914        Self::is_potential_table_row_content(content)
915    }
916
917    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
918    fn is_potential_table_row_content(content: &str) -> bool {
919        Self::is_potential_table_row(content)
920    }
921}
922
923#[cfg(test)]
924mod tests {
925    use super::*;
926    use crate::lint_context::LintContext;
927
928    #[test]
929    fn test_is_potential_table_row() {
930        // Basic valid table rows
931        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
932        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
933        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
934        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
935
936        // Multiple cells
937        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
938
939        // With whitespace
940        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
941        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
942
943        // Not table rows
944        assert!(!TableUtils::is_potential_table_row("- List item"));
945        assert!(!TableUtils::is_potential_table_row("* Another list"));
946        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
947        assert!(!TableUtils::is_potential_table_row("Regular text"));
948        assert!(!TableUtils::is_potential_table_row(""));
949        assert!(!TableUtils::is_potential_table_row("   "));
950
951        // Code blocks
952        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
953        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
954        assert!(!TableUtils::is_potential_table_row("Use ``a|b`` in prose"));
955        assert!(TableUtils::is_potential_table_row("| `fenced` | Uses ``` and ~~~ |"));
956        assert!(TableUtils::is_potential_table_row("`!foo && bar` | `(!foo) && bar`"));
957        assert!(!TableUtils::is_potential_table_row("`echo a | sed 's/a/b/'`"));
958
959        // Math spans: pipes inside $...$ are not table separators
960        assert!(!TableUtils::is_potential_table_row(
961            "Text with $|S|$ math notation here."
962        ));
963        assert!(!TableUtils::is_potential_table_row(
964            "Size $|S|$ was even, check $|T|$ too."
965        ));
966        assert!(!TableUtils::is_potential_table_row("Display $$|A| + |B|$$ math here."));
967        // Math pipe in cell with outer pipes is still a table row
968        assert!(TableUtils::is_potential_table_row("| cell with $|S|$ math |"));
969        // Pipe after fully closed math spans is still detected
970        assert!(TableUtils::is_potential_table_row("$a$ | $b$"));
971        assert!(TableUtils::is_potential_table_row("$f(x)$ and $g(x)$ | result"));
972        // $5 | $10 style price comparisons are suppressed as a deliberate trade-off:
973        // the leading $ opens a math span, consuming the pipe. Tables with bare dollar
974        // amounts should use outer pipes (| $5 | $10 |) to be correctly detected.
975        assert!(!TableUtils::is_potential_table_row("$5 | $10"));
976
977        // Single pipe not enough
978        assert!(!TableUtils::is_potential_table_row("Just one |"));
979        assert!(!TableUtils::is_potential_table_row("| Just one"));
980
981        // Very long cells are valid in tables (no length limit for cell content)
982        let long_cell = "a".repeat(150);
983        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
984
985        // Cells with newlines
986        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
987
988        // Empty cells (Issue #129)
989        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
990        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
991        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
992    }
993
994    #[test]
995    fn test_list_items_with_pipes_not_table_rows() {
996        // Ordered list items should NOT be detected as table rows
997        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
998        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
999        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
1000        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
1001        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
1002
1003        // Unordered list items with tabs
1004        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
1005        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
1006        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
1007
1008        // Indented list items (the trim_start normalizes indentation)
1009        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
1010        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
1011        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
1012
1013        // Task list items
1014        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
1015        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
1016
1017        // Multiple pipes in list items
1018        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
1019        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
1020
1021        // These SHOULD still be detected as potential table rows
1022        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
1023        assert!(TableUtils::is_potential_table_row("cell | cell"));
1024        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
1025    }
1026
1027    #[test]
1028    fn test_atx_headings_with_pipes_not_table_rows() {
1029        // All 6 ATX heading levels with pipes
1030        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
1031        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
1032        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
1033        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
1034        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
1035        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
1036
1037        // Multiple pipes in headings
1038        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1039        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1040
1041        // Headings with tab after hashes
1042        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1043        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1044
1045        // Heading with only hashes and pipe (empty heading text)
1046        assert!(!TableUtils::is_potential_table_row("# |"));
1047        assert!(!TableUtils::is_potential_table_row("## |"));
1048
1049        // Indented headings (spaces before #)
1050        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1051        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1052
1053        // Unicode content in headings (the original proptest failure case)
1054        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1055
1056        // 7+ hashes are NOT headings — should follow normal table detection
1057        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1058        // but with a space it still has 7+ hashes so not a heading
1059        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1060
1061        // Hash without space is NOT a heading, so pipe detection applies
1062        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1063
1064        // These SHOULD still be detected as potential table rows
1065        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1066        assert!(TableUtils::is_potential_table_row("text | #tag"));
1067    }
1068
1069    #[test]
1070    fn test_is_delimiter_row() {
1071        // Basic delimiter rows
1072        assert!(TableUtils::is_delimiter_row("|---|---|"));
1073        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1074        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1075        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1076
1077        // With varying dash counts
1078        assert!(TableUtils::is_delimiter_row("|-|--|"));
1079        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1080
1081        // With whitespace
1082        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1083        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1084
1085        // Multiple columns
1086        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1087
1088        // Without leading/trailing pipes
1089        assert!(TableUtils::is_delimiter_row("--- | ---"));
1090        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1091
1092        // Not delimiter rows
1093        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1094        assert!(!TableUtils::is_delimiter_row("Regular text"));
1095        assert!(!TableUtils::is_delimiter_row(""));
1096        assert!(!TableUtils::is_delimiter_row("|||"));
1097        assert!(!TableUtils::is_delimiter_row("| | |"));
1098
1099        // Must have dashes
1100        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1101        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1102
1103        // Mixed content
1104        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1105        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1106    }
1107
1108    #[test]
1109    fn test_count_cells() {
1110        // Basic counts
1111        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1112        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1113        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1114        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1115
1116        // Single cell
1117        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1118        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1119
1120        // Empty cells
1121        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1122        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1123
1124        // Many cells
1125        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1126
1127        // Edge cases
1128        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1129        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1130
1131        // No table
1132        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1133        assert_eq!(TableUtils::count_cells(""), 0);
1134        assert_eq!(TableUtils::count_cells("   "), 0);
1135
1136        // Whitespace handling
1137        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1138        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1139    }
1140
1141    #[test]
1142    fn test_count_cells_with_escaped_pipes() {
1143        // Pipes inside code spans are treated as content, not cell delimiters.
1144        // To include a literal pipe outside code spans, escape it with \|.
1145
1146        // Basic table structure
1147        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1148        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1149        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1150
1151        // Escaped pipes: \| keeps the pipe as content
1152        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1153        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1154
1155        // Escaped pipes inside backticks
1156        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1157
1158        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1159        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1160        // Double backslash inside backticks: pipe is still masked by code span
1161        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 2);
1162
1163        // Pipes inside code spans are content, not delimiters
1164        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
1165        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
1166        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
1167
1168        // Regex example - pipes in code spans are masked
1169        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 2);
1170        // Escaped pipe inside code is also masked (escape is redundant here)
1171        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1172    }
1173
1174    #[test]
1175    fn test_determine_pipe_style() {
1176        // All pipe styles
1177        assert_eq!(
1178            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1179            Some("leading_and_trailing")
1180        );
1181        assert_eq!(
1182            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1183            Some("leading_only")
1184        );
1185        assert_eq!(
1186            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1187            Some("trailing_only")
1188        );
1189        assert_eq!(
1190            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1191            Some("no_leading_or_trailing")
1192        );
1193
1194        // With whitespace
1195        assert_eq!(
1196            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1197            Some("leading_and_trailing")
1198        );
1199        assert_eq!(
1200            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1201            Some("leading_only")
1202        );
1203
1204        // No pipes
1205        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1206        assert_eq!(TableUtils::determine_pipe_style(""), None);
1207        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1208
1209        // Single pipe cases
1210        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1211        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1212        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1213    }
1214
1215    #[test]
1216    fn test_find_table_blocks_simple() {
1217        let content = "| Header 1 | Header 2 |
1218|-----------|-----------|
1219| Cell 1    | Cell 2    |
1220| Cell 3    | Cell 4    |";
1221
1222        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1223
1224        let tables = TableUtils::find_table_blocks(content, &ctx);
1225        assert_eq!(tables.len(), 1);
1226
1227        let table = &tables[0];
1228        assert_eq!(table.start_line, 0);
1229        assert_eq!(table.end_line, 3);
1230        assert_eq!(table.header_line, 0);
1231        assert_eq!(table.delimiter_line, 1);
1232        assert_eq!(table.content_lines, vec![2, 3]);
1233    }
1234
1235    #[test]
1236    fn test_find_table_blocks_multiple() {
1237        let content = "Some text
1238
1239| Table 1 | Col A |
1240|----------|-------|
1241| Data 1   | Val 1 |
1242
1243More text
1244
1245| Table 2 | Col 2 |
1246|----------|-------|
1247| Data 2   | Data  |";
1248
1249        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1250
1251        let tables = TableUtils::find_table_blocks(content, &ctx);
1252        assert_eq!(tables.len(), 2);
1253
1254        // First table
1255        assert_eq!(tables[0].start_line, 2);
1256        assert_eq!(tables[0].end_line, 4);
1257        assert_eq!(tables[0].header_line, 2);
1258        assert_eq!(tables[0].delimiter_line, 3);
1259        assert_eq!(tables[0].content_lines, vec![4]);
1260
1261        // Second table
1262        assert_eq!(tables[1].start_line, 8);
1263        assert_eq!(tables[1].end_line, 10);
1264        assert_eq!(tables[1].header_line, 8);
1265        assert_eq!(tables[1].delimiter_line, 9);
1266        assert_eq!(tables[1].content_lines, vec![10]);
1267    }
1268
1269    #[test]
1270    fn test_find_table_blocks_no_content_rows() {
1271        let content = "| Header 1 | Header 2 |
1272|-----------|-----------|
1273
1274Next paragraph";
1275
1276        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1277
1278        let tables = TableUtils::find_table_blocks(content, &ctx);
1279        assert_eq!(tables.len(), 1);
1280
1281        let table = &tables[0];
1282        assert_eq!(table.start_line, 0);
1283        assert_eq!(table.end_line, 1); // Just header and delimiter
1284        assert_eq!(table.content_lines.len(), 0);
1285    }
1286
1287    #[test]
1288    fn test_find_table_blocks_in_code_block() {
1289        let content = "```
1290| Not | A | Table |
1291|-----|---|-------|
1292| In  | Code | Block |
1293```
1294
1295| Real | Table |
1296|------|-------|
1297| Data | Here  |";
1298
1299        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1300
1301        let tables = TableUtils::find_table_blocks(content, &ctx);
1302        assert_eq!(tables.len(), 1); // Only the table outside code block
1303
1304        let table = &tables[0];
1305        assert_eq!(table.header_line, 6);
1306        assert_eq!(table.delimiter_line, 7);
1307    }
1308
1309    #[test]
1310    fn test_find_table_blocks_no_tables() {
1311        let content = "Just regular text
1312No tables here
1313- List item with | pipe
1314* Another list item";
1315
1316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1317
1318        let tables = TableUtils::find_table_blocks(content, &ctx);
1319        assert_eq!(tables.len(), 0);
1320    }
1321
1322    #[test]
1323    fn test_find_table_blocks_malformed() {
1324        let content = "| Header without delimiter |
1325| This looks like table |
1326But no delimiter row
1327
1328| Proper | Table |
1329|---------|-------|
1330| Data    | Here  |";
1331
1332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1333
1334        let tables = TableUtils::find_table_blocks(content, &ctx);
1335        assert_eq!(tables.len(), 1); // Only the proper table
1336        assert_eq!(tables[0].header_line, 4);
1337    }
1338
1339    #[test]
1340    fn test_edge_cases() {
1341        // Test empty content
1342        assert!(!TableUtils::is_potential_table_row(""));
1343        assert!(!TableUtils::is_delimiter_row(""));
1344        assert_eq!(TableUtils::count_cells(""), 0);
1345        assert_eq!(TableUtils::determine_pipe_style(""), None);
1346
1347        // Test whitespace only
1348        assert!(!TableUtils::is_potential_table_row("   "));
1349        assert!(!TableUtils::is_delimiter_row("   "));
1350        assert_eq!(TableUtils::count_cells("   "), 0);
1351        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1352
1353        // Test single character
1354        assert!(!TableUtils::is_potential_table_row("|"));
1355        assert!(!TableUtils::is_delimiter_row("|"));
1356        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1357
1358        // Test very long lines are valid table rows (no length limit)
1359        // Test both single-column and multi-column long lines
1360        let long_single = format!("| {} |", "a".repeat(200));
1361        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1362
1363        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1364        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1365
1366        // Test unicode
1367        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1368        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1369        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1370    }
1371
1372    #[test]
1373    fn test_table_block_struct() {
1374        let block = TableBlock {
1375            start_line: 0,
1376            end_line: 5,
1377            header_line: 0,
1378            delimiter_line: 1,
1379            content_lines: vec![2, 3, 4, 5],
1380            list_context: None,
1381        };
1382
1383        // Test Debug trait
1384        let debug_str = format!("{block:?}");
1385        assert!(debug_str.contains("TableBlock"));
1386        assert!(debug_str.contains("start_line: 0"));
1387
1388        // Test Clone trait
1389        let cloned = block.clone();
1390        assert_eq!(cloned.start_line, block.start_line);
1391        assert_eq!(cloned.end_line, block.end_line);
1392        assert_eq!(cloned.header_line, block.header_line);
1393        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1394        assert_eq!(cloned.content_lines, block.content_lines);
1395        assert!(cloned.list_context.is_none());
1396    }
1397
1398    #[test]
1399    fn test_split_table_row() {
1400        // Basic split
1401        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1402        assert_eq!(cells.len(), 3);
1403        assert_eq!(cells[0].trim(), "Cell 1");
1404        assert_eq!(cells[1].trim(), "Cell 2");
1405        assert_eq!(cells[2].trim(), "Cell 3");
1406
1407        // Without trailing pipe
1408        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1409        assert_eq!(cells.len(), 2);
1410
1411        // Empty cells
1412        let cells = TableUtils::split_table_row("| | | |");
1413        assert_eq!(cells.len(), 3);
1414
1415        // Single cell
1416        let cells = TableUtils::split_table_row("| Cell |");
1417        assert_eq!(cells.len(), 1);
1418        assert_eq!(cells[0].trim(), "Cell");
1419
1420        // No pipes
1421        let cells = TableUtils::split_table_row("No pipes here");
1422        assert_eq!(cells.len(), 0);
1423    }
1424
1425    #[test]
1426    fn test_split_table_row_with_escaped_pipes() {
1427        // Escaped pipes should be preserved in cell content
1428        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1429        assert_eq!(cells.len(), 2);
1430        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1431
1432        // Double backslash + pipe is NOT escaped
1433        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1434        assert_eq!(cells.len(), 3);
1435    }
1436
1437    #[test]
1438    fn test_split_table_row_with_flavor_mkdocs() {
1439        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1440        let cells =
1441            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1442        assert_eq!(cells.len(), 2);
1443        assert!(
1444            cells[1].contains("`x | y`"),
1445            "Inline code with pipe should be single cell in MkDocs flavor"
1446        );
1447
1448        // Multiple pipes in inline code
1449        let cells =
1450            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1451        assert_eq!(cells.len(), 2);
1452        assert!(cells[1].contains("`a | b | c`"));
1453    }
1454
1455    #[test]
1456    fn test_split_table_row_with_flavor_standard() {
1457        // Pipes in inline code are NOT cell delimiters for any flavor
1458        let cells =
1459            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1460        assert_eq!(
1461            cells.len(),
1462            2,
1463            "Pipes in code spans should not be cell delimiters, got {cells:?}"
1464        );
1465        assert!(
1466            cells[1].contains("`x | y`"),
1467            "Inline code with pipe should be single cell"
1468        );
1469    }
1470
1471    // === extract_blockquote_prefix tests ===
1472
1473    #[test]
1474    fn test_extract_blockquote_prefix_no_blockquote() {
1475        // Regular table row without blockquote
1476        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1477        assert_eq!(prefix, "");
1478        assert_eq!(content, "| H1 | H2 |");
1479    }
1480
1481    #[test]
1482    fn test_extract_blockquote_prefix_single_level() {
1483        // Single blockquote level
1484        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1485        assert_eq!(prefix, "> ");
1486        assert_eq!(content, "| H1 | H2 |");
1487    }
1488
1489    #[test]
1490    fn test_extract_blockquote_prefix_double_level() {
1491        // Double blockquote level
1492        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1493        assert_eq!(prefix, ">> ");
1494        assert_eq!(content, "| H1 | H2 |");
1495    }
1496
1497    #[test]
1498    fn test_extract_blockquote_prefix_triple_level() {
1499        // Triple blockquote level
1500        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1501        assert_eq!(prefix, ">>> ");
1502        assert_eq!(content, "| H1 | H2 |");
1503    }
1504
1505    #[test]
1506    fn test_extract_blockquote_prefix_with_spaces() {
1507        // Blockquote with spaces between markers
1508        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1509        assert_eq!(prefix, "> > ");
1510        assert_eq!(content, "| H1 | H2 |");
1511    }
1512
1513    #[test]
1514    fn test_extract_blockquote_prefix_indented() {
1515        // Indented blockquote
1516        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1517        assert_eq!(prefix, "  > ");
1518        assert_eq!(content, "| H1 | H2 |");
1519    }
1520
1521    #[test]
1522    fn test_extract_blockquote_prefix_no_space_after() {
1523        // Blockquote without space after marker
1524        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1525        assert_eq!(prefix, ">");
1526        assert_eq!(content, "| H1 | H2 |");
1527    }
1528
1529    #[test]
1530    fn test_determine_pipe_style_in_blockquote() {
1531        // determine_pipe_style should handle blockquotes correctly
1532        assert_eq!(
1533            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1534            Some("leading_and_trailing")
1535        );
1536        assert_eq!(
1537            TableUtils::determine_pipe_style("> H1 | H2"),
1538            Some("no_leading_or_trailing")
1539        );
1540        assert_eq!(
1541            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1542            Some("leading_and_trailing")
1543        );
1544        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1545    }
1546
1547    #[test]
1548    fn test_list_table_delimiter_requires_indentation() {
1549        // Test case: list item contains pipe, but delimiter line is at column 1
1550        // This should NOT be detected as a list table since the delimiter has no indentation.
1551        // The result is a non-list table starting at line 0 (the list item becomes the header)
1552        // but list_context should be None.
1553        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1555        let tables = TableUtils::find_table_blocks(content, &ctx);
1556
1557        // The table will be detected starting at line 0, but crucially it should NOT have
1558        // list_context set, meaning it won't be treated as a list-table for column count purposes
1559        assert_eq!(tables.len(), 1, "Should find exactly one table");
1560        assert!(
1561            tables[0].list_context.is_none(),
1562            "Should NOT have list context since delimiter has no indentation"
1563        );
1564    }
1565
1566    #[test]
1567    fn test_list_table_with_properly_indented_delimiter() {
1568        // Test case: list item with table header, delimiter properly indented
1569        // This SHOULD be detected as a list table
1570        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1571        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1572        let tables = TableUtils::find_table_blocks(content, &ctx);
1573
1574        // Should find exactly one list-table starting at line 0
1575        assert_eq!(tables.len(), 1, "Should find exactly one table");
1576        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1577        assert!(
1578            tables[0].list_context.is_some(),
1579            "Should be a list table since delimiter is properly indented"
1580        );
1581    }
1582
1583    #[test]
1584    fn test_mask_pipes_in_inline_code_regular_backticks() {
1585        // Regular backtick code span: pipe should be masked
1586        let result = TableUtils::mask_pipes_in_inline_code("| `code | here` |");
1587        assert_eq!(result, "| `code _ here` |");
1588    }
1589
1590    #[test]
1591    fn test_mask_pipes_in_inline_code_escaped_backtick_not_code_span() {
1592        // Escaped backtick (\`) is literal text, not a code span opener.
1593        // The pipe should NOT be masked.
1594        let result = TableUtils::mask_pipes_in_inline_code(r"| \`not code | still pipe\` |");
1595        assert_eq!(result, r"| \`not code | still pipe\` |");
1596    }
1597
1598    #[test]
1599    fn test_mask_pipes_in_inline_code_escaped_backslash_then_backtick() {
1600        // Escaped backslash (\\) followed by backtick: the backtick IS a code span opener.
1601        // The pipe inside the code span SHOULD be masked.
1602        let result = TableUtils::mask_pipes_in_inline_code(r"| \\`real code | masked\\` |");
1603        // \\` = escaped backslash + real backtick (code span opener)
1604        // The pipe between the backticks should be masked
1605        assert_eq!(result, r"| \\`real code _ masked\\` |");
1606    }
1607
1608    #[test]
1609    fn test_mask_pipes_in_inline_code_triple_backslash_before_backtick() {
1610        // Three backslashes before backtick: odd count means backtick is escaped
1611        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\`not code | pipe\\\` |");
1612        assert_eq!(result, r"| \\\`not code | pipe\\\` |");
1613    }
1614
1615    #[test]
1616    fn test_mask_pipes_in_inline_code_four_backslashes_before_backtick() {
1617        // Four backslashes before backtick: even count means backtick is a real delimiter
1618        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\\`code | here\\\\` |");
1619        assert_eq!(result, r"| \\\\`code _ here\\\\` |");
1620    }
1621
1622    #[test]
1623    fn test_mask_pipes_in_inline_code_no_backslash() {
1624        // No backslashes at all: standard behavior, pipe inside code span is masked
1625        let result = TableUtils::mask_pipes_in_inline_code("before `a | b` after");
1626        assert_eq!(result, "before `a _ b` after");
1627    }
1628
1629    #[test]
1630    fn test_mask_pipes_in_inline_code_no_code_span() {
1631        // No backticks at all: nothing should be masked
1632        let result = TableUtils::mask_pipes_in_inline_code("| col1 | col2 |");
1633        assert_eq!(result, "| col1 | col2 |");
1634    }
1635
1636    #[test]
1637    fn test_mask_pipes_in_inline_code_backslash_before_closing_backtick() {
1638        // Per CommonMark spec, backslash escapes do NOT work inside code spans.
1639        // Inside a code span, `\` is a literal character. So `foo\` is a valid
1640        // code span containing "foo\", and the closing backtick is NOT escaped.
1641        //
1642        // Input: | `foo\` | bar |
1643        // The code span is `foo\` (backtick opens, backslash is literal, backtick closes).
1644        // The pipe after the code span is a real delimiter, producing 2 cells.
1645        // The pipe inside the code span should be left alone (there isn't one here).
1646        let result = TableUtils::mask_pipes_in_inline_code(r"| `foo\` | bar |");
1647        // The backslash before closing backtick is literal inside the code span,
1648        // so the code span closes at that backtick. The pipe between cells is NOT masked.
1649        assert_eq!(result, r"| `foo\` | bar |");
1650    }
1651
1652    #[test]
1653    fn test_mask_pipes_in_inline_code_backslash_literal_with_pipe_inside() {
1654        // Code span contains a backslash and a pipe: `a\|b`
1655        // The backslash is literal inside the code span (CommonMark spec).
1656        // The pipe is inside the code span, so it should be masked.
1657        let result = TableUtils::mask_pipes_in_inline_code(r"| `a\|b` | col2 |");
1658        assert_eq!(result, r"| `a\_b` | col2 |");
1659    }
1660
1661    #[test]
1662    fn test_count_preceding_backslashes() {
1663        let chars: Vec<char> = r"abc\\\`def".chars().collect();
1664        // Position of backtick is at index 6 (a=0, b=1, c=2, \=3, \=4, \=5, `=6)
1665        assert_eq!(TableUtils::count_preceding_backslashes(&chars, 6), 3);
1666
1667        let chars2: Vec<char> = r"abc\\`def".chars().collect();
1668        // Position of backtick is at index 5
1669        assert_eq!(TableUtils::count_preceding_backslashes(&chars2, 5), 2);
1670
1671        let chars3: Vec<char> = "`def".chars().collect();
1672        // Position of backtick is at index 0 -- no preceding chars
1673        assert_eq!(TableUtils::count_preceding_backslashes(&chars3, 0), 0);
1674    }
1675
1676    #[test]
1677    fn test_has_unescaped_pipe_backslash_literal_in_code_span() {
1678        // Per CommonMark: backslashes are literal inside code spans.
1679        // `foo\` is a complete code span, so the pipe after it is outside code.
1680        assert!(TableUtils::has_unescaped_pipe_outside_spans(r"`foo\` | bar"));
1681
1682        // Escaped backtick outside code span: \` is not a code span opener
1683        assert!(TableUtils::has_unescaped_pipe_outside_spans(r"\`foo | bar\`"));
1684
1685        // Pipe inside code span should not count
1686        assert!(!TableUtils::has_unescaped_pipe_outside_spans(r"`foo | bar`"));
1687    }
1688
1689    #[test]
1690    fn test_table_after_code_span_detected() {
1691        use crate::config::MarkdownFlavor;
1692
1693        let content = "`code`\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1694        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1695        assert!(!ctx.table_blocks.is_empty(), "Table after code span should be detected");
1696    }
1697
1698    #[test]
1699    fn test_table_inside_html_comment_not_detected() {
1700        use crate::config::MarkdownFlavor;
1701
1702        let content = "<!--\n| A | B |\n|---|---|\n| 1 | 2 |\n-->\n";
1703        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1704        assert!(
1705            ctx.table_blocks.is_empty(),
1706            "Table inside HTML comment should not be detected"
1707        );
1708    }
1709}