Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Returns true if the line has at least one unescaped pipe separator outside inline code and
33    /// math spans.
34    ///
35    /// Skips pipes inside backtick code spans (`` `...` ``) and dollar-sign math spans (`$...$`,
36    /// `$$...$$`) to avoid false positives from prose like `` `echo a | sed 's/a/b/'` `` or math
37    /// like `$|S|$` (absolute value notation).
38    ///
39    /// Note: a bare `$` that opens a span without a matching closing `$` keeps the scanner in
40    /// math mode for the rest of the line, suppressing any subsequent pipes. This is conservative
41    /// and means that `$5 | $10`-style price comparisons (without outer pipes) are not detected
42    /// as table separators — an accepted trade-off to avoid false positives from real math.
43    fn has_unescaped_pipe_outside_spans(text: &str) -> bool {
44        let chars: Vec<char> = text.chars().collect();
45        let mut i = 0;
46        let mut in_code = false;
47        let mut code_delim_len = 0usize;
48        let mut in_math = false;
49        let mut math_delim_len = 0usize;
50
51        while i < chars.len() {
52            let ch = chars[i];
53
54            if ch == '\\' && !in_code && !in_math {
55                // Skip escaped character (only outside code and math spans —
56                // backslashes are literal inside code spans per CommonMark).
57                i += if i + 1 < chars.len() { 2 } else { 1 };
58                continue;
59            }
60
61            if ch == '`' && !in_math {
62                let mut run = 1usize;
63                while i + run < chars.len() && chars[i + run] == '`' {
64                    run += 1;
65                }
66
67                if in_code {
68                    if run == code_delim_len {
69                        in_code = false;
70                        code_delim_len = 0;
71                    }
72                    // Mismatched backtick run inside a code span: consumed but span stays open.
73                } else {
74                    in_code = true;
75                    code_delim_len = run;
76                }
77
78                i += run;
79                continue;
80            }
81
82            if ch == '$' && !in_code {
83                let mut run = 1usize;
84                while i + run < chars.len() && chars[i + run] == '$' {
85                    run += 1;
86                }
87
88                if in_math {
89                    if run == math_delim_len {
90                        in_math = false;
91                        math_delim_len = 0;
92                    }
93                    // Mismatched $-run inside a math span: consumed but span stays open.
94                } else {
95                    in_math = true;
96                    math_delim_len = run;
97                }
98
99                i += run;
100                continue;
101            }
102
103            if ch == '|' && !in_code && !in_math {
104                return true;
105            }
106
107            i += 1;
108        }
109
110        false
111    }
112
113    /// Check if a line looks like a potential table row
114    pub fn is_potential_table_row(line: &str) -> bool {
115        let trimmed = line.trim();
116        if trimmed.is_empty() || !trimmed.contains('|') {
117            return false;
118        }
119
120        // Skip lines that are clearly not table rows
121        // Unordered list items with space or tab after marker
122        if trimmed.starts_with("- ")
123            || trimmed.starts_with("* ")
124            || trimmed.starts_with("+ ")
125            || trimmed.starts_with("-\t")
126            || trimmed.starts_with("*\t")
127            || trimmed.starts_with("+\t")
128        {
129            return false;
130        }
131
132        // Skip ordered list items: digits followed by . or ) then space/tab
133        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
134            && first_non_digit > 0
135        {
136            let after_digits = &trimmed[first_non_digit..];
137            if after_digits.starts_with(". ")
138                || after_digits.starts_with(".\t")
139                || after_digits.starts_with(") ")
140                || after_digits.starts_with(")\t")
141            {
142                return false;
143            }
144        }
145
146        // Skip ATX headings (# through ######)
147        if trimmed.starts_with('#') {
148            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
149            if hash_count <= 6 {
150                let after_hashes = &trimmed[hash_count..];
151                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
152                    return false;
153                }
154            }
155        }
156
157        // For rows without explicit outer pipes, require a real separator outside
158        // inline code and math spans to avoid prose/command false positives.
159        let has_outer_pipes = trimmed.starts_with('|') && trimmed.ends_with('|');
160        if !has_outer_pipes && !Self::has_unescaped_pipe_outside_spans(trimmed) {
161            return false;
162        }
163
164        // Must have at least 2 parts when split by |
165        let parts: Vec<&str> = trimmed.split('|').collect();
166        if parts.len() < 2 {
167            return false;
168        }
169
170        // Check if it looks like a table row by having reasonable content between pipes
171        let mut valid_parts = 0;
172        let mut total_non_empty_parts = 0;
173
174        for part in &parts {
175            let part_trimmed = part.trim();
176            // Skip empty parts (from leading/trailing pipes)
177            if part_trimmed.is_empty() {
178                continue;
179            }
180            total_non_empty_parts += 1;
181
182            // Count parts that look like table cells (reasonable content, no newlines)
183            if !part_trimmed.contains('\n') {
184                valid_parts += 1;
185            }
186        }
187
188        // Check if all non-empty parts are valid (no newlines)
189        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
190            // Some cells contain newlines, not a valid table row
191            return false;
192        }
193
194        // GFM allows tables with all empty cells (e.g., |||)
195        // These are valid if they have proper table formatting (leading and trailing pipes)
196        if total_non_empty_parts == 0 {
197            // Empty cells are only valid with proper pipe formatting
198            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
199        }
200
201        // GFM allows single-column tables, so >= 1 valid part is enough
202        // when the line has proper table formatting (pipes)
203        if trimmed.starts_with('|') && trimmed.ends_with('|') {
204            // Properly formatted table row with pipes on both ends
205            valid_parts >= 1
206        } else {
207            // For rows without proper pipe formatting, require at least 2 cells
208            valid_parts >= 2
209        }
210    }
211
212    /// Check if a line is a table delimiter row (e.g., |---|---|)
213    pub fn is_delimiter_row(line: &str) -> bool {
214        let trimmed = line.trim();
215        if !trimmed.contains('|') || !trimmed.contains('-') {
216            return false;
217        }
218
219        // Split by pipes and check each part
220        let parts: Vec<&str> = trimmed.split('|').collect();
221        let mut valid_delimiter_parts = 0;
222        let mut total_non_empty_parts = 0;
223
224        for part in &parts {
225            let part_trimmed = part.trim();
226            if part_trimmed.is_empty() {
227                continue; // Skip empty parts from leading/trailing pipes
228            }
229
230            total_non_empty_parts += 1;
231
232            // Check if this part looks like a delimiter (contains dashes and optionally colons)
233            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
234                valid_delimiter_parts += 1;
235            }
236        }
237
238        // All non-empty parts must be valid delimiters, and there must be at least one
239        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
240    }
241
242    /// Strip blockquote prefix from a line, returning the content without the prefix
243    fn strip_blockquote_prefix(line: &str) -> &str {
244        let trimmed = line.trim_start();
245        if trimmed.starts_with('>') {
246            // Strip all blockquote markers and following space
247            let mut rest = trimmed;
248            while rest.starts_with('>') {
249                rest = rest.strip_prefix('>').unwrap_or(rest);
250                rest = rest.trim_start_matches(' ');
251            }
252            rest
253        } else {
254            line
255        }
256    }
257
258    /// Find all table blocks in the content with optimized detection
259    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
260    pub fn find_table_blocks_with_code_info(
261        content: &str,
262        code_blocks: &[(usize, usize)],
263        code_spans: &[crate::lint_context::CodeSpan],
264        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
265    ) -> Vec<TableBlock> {
266        let lines: Vec<&str> = content.lines().collect();
267        let mut tables = Vec::new();
268        let mut i = 0;
269
270        // Pre-compute line positions for efficient code block checking
271        let mut line_positions = Vec::with_capacity(lines.len());
272        let mut pos = 0;
273        for line in &lines {
274            line_positions.push(pos);
275            pos += line.len() + 1; // +1 for newline
276        }
277
278        // Stack of active list content indents for continuation table tracking.
279        // Supports nested lists: when a child list is seen, we push; when we
280        // dedent past a level, we pop back to the enclosing list.
281        let mut list_indent_stack: Vec<usize> = Vec::new();
282
283        while i < lines.len() {
284            // Skip lines in code blocks, code spans, or HTML comments
285            let line_start = line_positions[i];
286            let in_code =
287                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start) || {
288                    // Binary search on sorted code spans
289                    let idx = code_spans.partition_point(|span| span.byte_offset <= line_start);
290                    idx > 0 && line_start < code_spans[idx - 1].byte_end
291                };
292            let in_html_comment = {
293                // Binary search on sorted HTML comment ranges
294                let idx = html_comment_ranges.partition_point(|range| range.start <= line_start);
295                idx > 0 && line_start < html_comment_ranges[idx - 1].end
296            };
297
298            if in_code || in_html_comment {
299                i += 1;
300                continue;
301            }
302
303            // Strip blockquote prefix for table detection
304            let line_content = Self::strip_blockquote_prefix(lines[i]);
305
306            // Update active list tracking
307            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
308            if !list_prefix.is_empty() {
309                // Line has a list marker. Pop any deeper/equal levels, then push this one.
310                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
311                    list_indent_stack.pop();
312                }
313                list_indent_stack.push(content_indent);
314            } else if !line_content.trim().is_empty() {
315                // Non-blank line without a marker: pop any levels we've dedented past
316                let leading = line_content.len() - line_content.trim_start().len();
317                while list_indent_stack.last().is_some_and(|&top| leading < top) {
318                    list_indent_stack.pop();
319                }
320            }
321            // Blank lines keep the stack unchanged (blank lines don't end list items)
322
323            // Check if this is a list item that contains a table row on the same line,
324            // or a continuation table indented under an active list item
325            let (is_same_line_list_table, effective_content) =
326                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
327                    (true, list_content)
328                } else {
329                    (false, line_content)
330                };
331
332            // Detect continuation list tables: no marker on this line, but indented
333            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
334            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
335                let leading = line_content.len() - line_content.trim_start().len();
336                // Find the deepest list level this line is indented under
337                list_indent_stack
338                    .iter()
339                    .rev()
340                    .find(|&&indent| leading >= indent)
341                    .copied()
342            } else {
343                None
344            };
345
346            let is_continuation_list_table = continuation_indent.is_some()
347                && {
348                    let indent = continuation_indent.unwrap();
349                    let leading = line_content.len() - line_content.trim_start().len();
350                    // Per CommonMark, 4+ spaces beyond content indent is a code block
351                    leading < indent + 4
352                }
353                && Self::is_potential_table_row(effective_content);
354
355            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
356
357            // For continuation list tables, use the matched list indent
358            let effective_content_indent = if is_same_line_list_table {
359                content_indent
360            } else if is_continuation_list_table {
361                continuation_indent.unwrap()
362            } else {
363                0
364            };
365
366            // Look for potential table start
367            if is_any_list_table || Self::is_potential_table_row(effective_content) {
368                // For list tables (same-line or continuation), check indented continuation lines
369                // For regular tables, check the next line directly
370                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
371                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
372                    if is_any_list_table {
373                        // Verify the delimiter line has proper indentation
374                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
375                        if leading_spaces >= effective_content_indent {
376                            // Has proper indentation, strip it and check as delimiter
377                            (
378                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
379                                true,
380                            )
381                        } else {
382                            // Not enough indentation - not a list table
383                            (next_raw, false)
384                        }
385                    } else {
386                        (next_raw, true)
387                    }
388                } else {
389                    ("", true)
390                };
391
392                // For list tables, only accept if delimiter has valid indentation
393                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
394
395                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
396                    // Found a table! Find its end
397                    let table_start = i;
398                    let header_line = i;
399                    let delimiter_line = i + 1;
400                    let mut table_end = i + 1; // Include the delimiter row
401                    let mut content_lines = Vec::new();
402
403                    // Continue while we have table rows
404                    let mut j = i + 2;
405                    while j < lines.len() {
406                        let line = lines[j];
407                        // Strip blockquote prefix for checking
408                        let raw_content = Self::strip_blockquote_prefix(line);
409
410                        // For list tables, strip expected indentation
411                        let line_content = if effective_is_list_table {
412                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
413                        } else {
414                            raw_content
415                        };
416
417                        if line_content.trim().is_empty() {
418                            // Empty line ends the table
419                            break;
420                        }
421
422                        // For list tables, the continuation line must have proper indentation
423                        if effective_is_list_table {
424                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
425                            if leading_spaces < effective_content_indent {
426                                // Not enough indentation - end of table
427                                break;
428                            }
429                        }
430
431                        if Self::is_potential_table_row(line_content) {
432                            content_lines.push(j);
433                            table_end = j;
434                            j += 1;
435                        } else {
436                            // Non-table line ends the table
437                            break;
438                        }
439                    }
440
441                    let list_context = if effective_is_list_table {
442                        if is_same_line_list_table {
443                            // Same-line: prefix is the actual list marker (e.g., "- ")
444                            Some(ListTableContext {
445                                list_prefix: list_prefix.to_string(),
446                                content_indent: effective_content_indent,
447                            })
448                        } else {
449                            // Continuation: prefix is the indentation spaces
450                            Some(ListTableContext {
451                                list_prefix: " ".repeat(effective_content_indent),
452                                content_indent: effective_content_indent,
453                            })
454                        }
455                    } else {
456                        None
457                    };
458
459                    tables.push(TableBlock {
460                        start_line: table_start,
461                        end_line: table_end,
462                        header_line,
463                        delimiter_line,
464                        content_lines,
465                        list_context,
466                    });
467                    i = table_end + 1;
468                } else {
469                    i += 1;
470                }
471            } else {
472                i += 1;
473            }
474        }
475
476        tables
477    }
478
479    /// Strip list continuation indentation from a line.
480    /// For lines that are continuations of a list item's content, strip the expected indent.
481    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
482        let bytes = line.as_bytes();
483        let mut spaces = 0;
484
485        for &b in bytes {
486            if b == b' ' {
487                spaces += 1;
488            } else if b == b'\t' {
489                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
490                spaces = (spaces / 4 + 1) * 4;
491            } else {
492                break;
493            }
494
495            if spaces >= expected_indent {
496                break;
497            }
498        }
499
500        // Strip at most expected_indent characters
501        let strip_count = spaces.min(expected_indent).min(line.len());
502        // Count actual bytes to strip (handling tabs)
503        let mut byte_count = 0;
504        let mut counted_spaces = 0;
505        for &b in bytes {
506            if counted_spaces >= strip_count {
507                break;
508            }
509            if b == b' ' {
510                counted_spaces += 1;
511                byte_count += 1;
512            } else if b == b'\t' {
513                counted_spaces = (counted_spaces / 4 + 1) * 4;
514                byte_count += 1;
515            } else {
516                break;
517            }
518        }
519
520        &line[byte_count..]
521    }
522
523    /// Find all table blocks in the content with optimized detection
524    /// This is a backward-compatible wrapper that accepts LintContext
525    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
526        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
527    }
528
529    /// Count the number of cells in a table row
530    pub fn count_cells(row: &str) -> usize {
531        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
532    }
533
534    /// Count the number of cells in a table row with flavor-specific behavior
535    ///
536    /// Pipes inside code spans are treated as content, not cell delimiters.
537    ///
538    /// This function strips blockquote prefixes before counting cells, so it works
539    /// correctly for tables inside blockquotes.
540    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
541        // Strip blockquote prefix if present before counting cells
542        let (_, content) = Self::extract_blockquote_prefix(row);
543        Self::split_table_row_with_flavor(content, flavor).len()
544    }
545
546    /// Count the number of consecutive backslashes immediately preceding `pos` in `chars`.
547    fn count_preceding_backslashes(chars: &[char], pos: usize) -> usize {
548        let mut count = 0;
549        let mut k = pos;
550        while k > 0 {
551            k -= 1;
552            if chars[k] == '\\' {
553                count += 1;
554            } else {
555                break;
556            }
557        }
558        count
559    }
560
561    /// Mask pipes inside inline code blocks with a placeholder character.
562    ///
563    /// Backticks preceded by an odd number of backslashes are escaped (literal text)
564    /// and do not open or close code spans. An even number of backslashes means the
565    /// backslashes themselves are escaped, so the backtick is a real delimiter.
566    pub fn mask_pipes_in_inline_code(text: &str) -> String {
567        let mut result = String::new();
568        let chars: Vec<char> = text.chars().collect();
569        let mut i = 0;
570
571        while i < chars.len() {
572            if chars[i] == '`' {
573                // A backtick preceded by an odd number of backslashes is escaped
574                let preceding = Self::count_preceding_backslashes(&chars, i);
575                if preceding % 2 != 0 {
576                    // Escaped backtick -- treat as literal text, not a code span opener
577                    result.push(chars[i]);
578                    i += 1;
579                    continue;
580                }
581
582                // Count consecutive backticks at start
583                let start = i;
584                let mut backtick_count = 0;
585                while i < chars.len() && chars[i] == '`' {
586                    backtick_count += 1;
587                    i += 1;
588                }
589
590                // Look for matching closing backticks
591                let mut found_closing = false;
592                let mut j = i;
593
594                while j < chars.len() {
595                    if chars[j] == '`' {
596                        // Per CommonMark spec, backslash escapes do NOT work inside code
597                        // spans -- all characters including backslashes are literal. So we
598                        // do NOT check count_preceding_backslashes here (only for the
599                        // opening backtick above).
600
601                        // Count potential closing backticks
602                        let close_start = j;
603                        let mut close_count = 0;
604                        while j < chars.len() && chars[j] == '`' {
605                            close_count += 1;
606                            j += 1;
607                        }
608
609                        if close_count == backtick_count {
610                            // Found matching closing backticks
611                            found_closing = true;
612
613                            // Valid inline code - add with pipes masked
614                            result.extend(chars[start..i].iter());
615
616                            for &ch in chars.iter().take(close_start).skip(i) {
617                                if ch == '|' {
618                                    result.push('_'); // Mask pipe with underscore
619                                } else {
620                                    result.push(ch);
621                                }
622                            }
623
624                            result.extend(chars[close_start..j].iter());
625                            i = j;
626                            break;
627                        }
628                        // If not matching, continue searching (j is already past these backticks)
629                    } else {
630                        j += 1;
631                    }
632                }
633
634                if !found_closing {
635                    // No matching closing found, treat as regular text
636                    result.extend(chars[start..i].iter());
637                }
638            } else {
639                result.push(chars[i]);
640                i += 1;
641            }
642        }
643
644        result
645    }
646
647    /// Mask escaped pipes for accurate table cell parsing
648    ///
649    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
650    /// - `\|` → escaped pipe → masked (stays as cell content)
651    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
652    ///
653    /// This function only handles escaped pipes. Pipes inside inline code spans
654    /// are handled separately by `mask_pipes_in_inline_code`.
655    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
656        let mut result = String::new();
657        let chars: Vec<char> = text.chars().collect();
658        let mut i = 0;
659
660        while i < chars.len() {
661            if chars[i] == '\\' {
662                if i + 1 < chars.len() && chars[i + 1] == '\\' {
663                    // Escaped backslash: \\ → push both and continue
664                    // The next character (if it's a pipe) will be a real delimiter
665                    result.push('\\');
666                    result.push('\\');
667                    i += 2;
668                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
669                    // Escaped pipe: \| → mask the pipe
670                    result.push('\\');
671                    result.push('_'); // Mask the pipe
672                    i += 2;
673                } else {
674                    // Single backslash not followed by \ or | → just push it
675                    result.push(chars[i]);
676                    i += 1;
677                }
678            } else {
679                result.push(chars[i]);
680                i += 1;
681            }
682        }
683
684        result
685    }
686
687    /// Split a table row into individual cell contents with flavor-specific behavior.
688    ///
689    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
690    /// This is the foundation for both cell counting and cell content extraction.
691    ///
692    /// Pipes inside code spans are treated as content, not cell delimiters.
693    pub fn split_table_row_with_flavor(row: &str, _flavor: crate::config::MarkdownFlavor) -> Vec<String> {
694        let trimmed = row.trim();
695
696        if !trimmed.contains('|') {
697            return Vec::new();
698        }
699
700        // First, mask escaped pipes (same for all flavors)
701        let masked = Self::mask_pipes_for_table_parsing(trimmed);
702
703        // Mask pipes inside inline code for all flavors
704        let final_masked = Self::mask_pipes_in_inline_code(&masked);
705
706        let has_leading = final_masked.starts_with('|');
707        let has_trailing = final_masked.ends_with('|');
708
709        let mut masked_content = final_masked.as_str();
710        let mut orig_content = trimmed;
711
712        if has_leading {
713            masked_content = &masked_content[1..];
714            orig_content = &orig_content[1..];
715        }
716
717        // Track whether we actually strip a trailing pipe
718        let stripped_trailing = has_trailing && !masked_content.is_empty();
719        if stripped_trailing {
720            masked_content = &masked_content[..masked_content.len() - 1];
721            orig_content = &orig_content[..orig_content.len() - 1];
722        }
723
724        // Handle edge cases for degenerate inputs
725        if masked_content.is_empty() {
726            if stripped_trailing {
727                // "||" case: two pipes with empty content between = one empty cell
728                return vec![String::new()];
729            } else {
730                // "|" case: single pipe, not a valid table row
731                return Vec::new();
732            }
733        }
734
735        let masked_parts: Vec<&str> = masked_content.split('|').collect();
736        let mut cells = Vec::new();
737        let mut pos = 0;
738
739        for masked_cell in masked_parts {
740            let cell_len = masked_cell.len();
741            let orig_cell = if pos + cell_len <= orig_content.len() {
742                &orig_content[pos..pos + cell_len]
743            } else {
744                masked_cell
745            };
746            cells.push(orig_cell.to_string());
747            pos += cell_len + 1; // +1 for the pipe delimiter
748        }
749
750        cells
751    }
752
753    /// Split a table row into individual cell contents using Standard/GFM behavior.
754    pub fn split_table_row(row: &str) -> Vec<String> {
755        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
756    }
757
758    /// Determine the pipe style of a table row
759    ///
760    /// Handles tables inside blockquotes by stripping the blockquote prefix
761    /// before analyzing the pipe style.
762    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
763        // Strip blockquote prefix if present before analyzing pipe style
764        let content = Self::strip_blockquote_prefix(line);
765        let trimmed = content.trim();
766        if !trimmed.contains('|') {
767            return None;
768        }
769
770        let has_leading = trimmed.starts_with('|');
771        let has_trailing = trimmed.ends_with('|');
772
773        match (has_leading, has_trailing) {
774            (true, true) => Some("leading_and_trailing"),
775            (true, false) => Some("leading_only"),
776            (false, true) => Some("trailing_only"),
777            (false, false) => Some("no_leading_or_trailing"),
778        }
779    }
780
781    /// Extract blockquote prefix from a line, returning (prefix, content).
782    ///
783    /// This is useful for stripping the prefix before processing, then restoring it after.
784    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
785    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
786        // Find where the actual content starts (after blockquote markers and spaces)
787        let bytes = line.as_bytes();
788        let mut pos = 0;
789
790        // Skip leading whitespace (indent before blockquote marker)
791        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
792            pos += 1;
793        }
794
795        // If no blockquote marker, return empty prefix
796        if pos >= bytes.len() || bytes[pos] != b'>' {
797            return ("", line);
798        }
799
800        // Skip all blockquote markers and spaces
801        while pos < bytes.len() {
802            if bytes[pos] == b'>' {
803                pos += 1;
804                // Skip optional space after >
805                if pos < bytes.len() && bytes[pos] == b' ' {
806                    pos += 1;
807                }
808            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
809                pos += 1;
810            } else {
811                break;
812            }
813        }
814
815        // Split at the position where content starts
816        (&line[..pos], &line[pos..])
817    }
818
819    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
820    ///
821    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
822    /// Returns:
823    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
824    /// - content: The content after the list marker
825    /// - content_indent: The number of spaces needed for continuation lines to align with content
826    ///
827    /// For example:
828    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
829    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
830    /// - `"  - table"` returns `("  - ", "table", 4)`
831    ///
832    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
833    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
834        let bytes = line.as_bytes();
835
836        // Skip leading whitespace
837        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
838        let mut pos = leading_spaces;
839
840        if pos >= bytes.len() {
841            return ("", line, 0);
842        }
843
844        // Check for unordered list marker: -, *, +
845        if matches!(bytes[pos], b'-' | b'*' | b'+') {
846            pos += 1;
847
848            // Must be followed by space or tab (or end of line for marker-only lines)
849            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
850                // Skip the space after marker if present
851                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
852                    pos += 1;
853                }
854                let content_indent = pos;
855                return (&line[..pos], &line[pos..], content_indent);
856            }
857            // Not a list marker (e.g., "-word" or "--")
858            return ("", line, 0);
859        }
860
861        // Check for ordered list marker: digits followed by . or ) then space
862        if bytes[pos].is_ascii_digit() {
863            let digit_start = pos;
864            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
865                pos += 1;
866            }
867
868            // Must have at least one digit
869            if pos > digit_start && pos < bytes.len() {
870                // Check for . or ) followed by space/tab
871                if bytes[pos] == b'.' || bytes[pos] == b')' {
872                    pos += 1;
873                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
874                        // Skip the space after marker if present
875                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
876                            pos += 1;
877                        }
878                        let content_indent = pos;
879                        return (&line[..pos], &line[pos..], content_indent);
880                    }
881                }
882            }
883        }
884
885        ("", line, 0)
886    }
887
888    /// Extract the table row content from a line, stripping any list/blockquote prefix.
889    ///
890    /// This is useful for processing table rows that may be inside list items or blockquotes.
891    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
892    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
893        // First strip blockquote prefix
894        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
895
896        // Then handle list prefix if present
897        if let Some(ref list_ctx) = table_block.list_context {
898            if line_index == 0 {
899                // Header line: strip list prefix (handles both markers and indentation)
900                after_blockquote
901                    .strip_prefix(&list_ctx.list_prefix)
902                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
903            } else {
904                // Continuation lines: strip indentation
905                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
906            }
907        } else {
908            after_blockquote
909        }
910    }
911
912    /// Check if the content after a list marker looks like a table row.
913    /// This is used to detect tables that start on the same line as a list marker.
914    pub fn is_list_item_with_table_row(line: &str) -> bool {
915        let (prefix, content, _) = Self::extract_list_prefix(line);
916        if prefix.is_empty() {
917            return false;
918        }
919
920        // Check if the content after the list marker is a table row
921        // It must start with | (proper table format within a list)
922        let trimmed = content.trim();
923        if !trimmed.starts_with('|') {
924            return false;
925        }
926
927        // Use our table row detection on the content
928        Self::is_potential_table_row_content(content)
929    }
930
931    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
932    fn is_potential_table_row_content(content: &str) -> bool {
933        Self::is_potential_table_row(content)
934    }
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940    use crate::lint_context::LintContext;
941
942    #[test]
943    fn test_is_potential_table_row() {
944        // Basic valid table rows
945        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
946        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
947        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
948        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
949
950        // Multiple cells
951        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
952
953        // With whitespace
954        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
955        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
956
957        // Not table rows
958        assert!(!TableUtils::is_potential_table_row("- List item"));
959        assert!(!TableUtils::is_potential_table_row("* Another list"));
960        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
961        assert!(!TableUtils::is_potential_table_row("Regular text"));
962        assert!(!TableUtils::is_potential_table_row(""));
963        assert!(!TableUtils::is_potential_table_row("   "));
964
965        // Code blocks
966        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
967        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
968        assert!(!TableUtils::is_potential_table_row("Use ``a|b`` in prose"));
969        assert!(TableUtils::is_potential_table_row("| `fenced` | Uses ``` and ~~~ |"));
970        assert!(TableUtils::is_potential_table_row("`!foo && bar` | `(!foo) && bar`"));
971        assert!(!TableUtils::is_potential_table_row("`echo a | sed 's/a/b/'`"));
972
973        // Math spans: pipes inside $...$ are not table separators
974        assert!(!TableUtils::is_potential_table_row(
975            "Text with $|S|$ math notation here."
976        ));
977        assert!(!TableUtils::is_potential_table_row(
978            "Size $|S|$ was even, check $|T|$ too."
979        ));
980        assert!(!TableUtils::is_potential_table_row("Display $$|A| + |B|$$ math here."));
981        // Math pipe in cell with outer pipes is still a table row
982        assert!(TableUtils::is_potential_table_row("| cell with $|S|$ math |"));
983        // Pipe after fully closed math spans is still detected
984        assert!(TableUtils::is_potential_table_row("$a$ | $b$"));
985        assert!(TableUtils::is_potential_table_row("$f(x)$ and $g(x)$ | result"));
986        // $5 | $10 style price comparisons are suppressed as a deliberate trade-off:
987        // the leading $ opens a math span, consuming the pipe. Tables with bare dollar
988        // amounts should use outer pipes (| $5 | $10 |) to be correctly detected.
989        assert!(!TableUtils::is_potential_table_row("$5 | $10"));
990
991        // Single pipe not enough
992        assert!(!TableUtils::is_potential_table_row("Just one |"));
993        assert!(!TableUtils::is_potential_table_row("| Just one"));
994
995        // Very long cells are valid in tables (no length limit for cell content)
996        let long_cell = "a".repeat(150);
997        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
998
999        // Cells with newlines
1000        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
1001
1002        // Empty cells (Issue #129)
1003        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
1004        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
1005        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
1006    }
1007
1008    #[test]
1009    fn test_list_items_with_pipes_not_table_rows() {
1010        // Ordered list items should NOT be detected as table rows
1011        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
1012        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
1013        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
1014        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
1015        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
1016
1017        // Unordered list items with tabs
1018        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
1019        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
1020        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
1021
1022        // Indented list items (the trim_start normalizes indentation)
1023        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
1024        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
1025        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
1026
1027        // Task list items
1028        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
1029        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
1030
1031        // Multiple pipes in list items
1032        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
1033        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
1034
1035        // These SHOULD still be detected as potential table rows
1036        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
1037        assert!(TableUtils::is_potential_table_row("cell | cell"));
1038        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
1039    }
1040
1041    #[test]
1042    fn test_atx_headings_with_pipes_not_table_rows() {
1043        // All 6 ATX heading levels with pipes
1044        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
1045        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
1046        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
1047        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
1048        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
1049        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
1050
1051        // Multiple pipes in headings
1052        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1053        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1054
1055        // Headings with tab after hashes
1056        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1057        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1058
1059        // Heading with only hashes and pipe (empty heading text)
1060        assert!(!TableUtils::is_potential_table_row("# |"));
1061        assert!(!TableUtils::is_potential_table_row("## |"));
1062
1063        // Indented headings (spaces before #)
1064        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1065        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1066
1067        // Unicode content in headings (the original proptest failure case)
1068        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1069
1070        // 7+ hashes are NOT headings — should follow normal table detection
1071        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1072        // but with a space it still has 7+ hashes so not a heading
1073        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1074
1075        // Hash without space is NOT a heading, so pipe detection applies
1076        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1077
1078        // These SHOULD still be detected as potential table rows
1079        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1080        assert!(TableUtils::is_potential_table_row("text | #tag"));
1081    }
1082
1083    #[test]
1084    fn test_is_delimiter_row() {
1085        // Basic delimiter rows
1086        assert!(TableUtils::is_delimiter_row("|---|---|"));
1087        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1088        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1089        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1090
1091        // With varying dash counts
1092        assert!(TableUtils::is_delimiter_row("|-|--|"));
1093        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1094
1095        // With whitespace
1096        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1097        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1098
1099        // Multiple columns
1100        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1101
1102        // Without leading/trailing pipes
1103        assert!(TableUtils::is_delimiter_row("--- | ---"));
1104        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1105
1106        // Not delimiter rows
1107        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1108        assert!(!TableUtils::is_delimiter_row("Regular text"));
1109        assert!(!TableUtils::is_delimiter_row(""));
1110        assert!(!TableUtils::is_delimiter_row("|||"));
1111        assert!(!TableUtils::is_delimiter_row("| | |"));
1112
1113        // Must have dashes
1114        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1115        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1116
1117        // Mixed content
1118        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1119        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1120    }
1121
1122    #[test]
1123    fn test_count_cells() {
1124        // Basic counts
1125        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1126        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1127        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1128        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1129
1130        // Single cell
1131        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1132        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1133
1134        // Empty cells
1135        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1136        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1137
1138        // Many cells
1139        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1140
1141        // Edge cases
1142        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1143        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1144
1145        // No table
1146        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1147        assert_eq!(TableUtils::count_cells(""), 0);
1148        assert_eq!(TableUtils::count_cells("   "), 0);
1149
1150        // Whitespace handling
1151        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1152        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1153    }
1154
1155    #[test]
1156    fn test_count_cells_with_escaped_pipes() {
1157        // Pipes inside code spans are treated as content, not cell delimiters.
1158        // To include a literal pipe outside code spans, escape it with \|.
1159
1160        // Basic table structure
1161        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1162        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1163        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1164
1165        // Escaped pipes: \| keeps the pipe as content
1166        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1167        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1168
1169        // Escaped pipes inside backticks
1170        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1171
1172        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1173        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1174        // Double backslash inside backticks: pipe is still masked by code span
1175        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 2);
1176
1177        // Pipes inside code spans are content, not delimiters
1178        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
1179        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
1180        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
1181
1182        // Regex example - pipes in code spans are masked
1183        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 2);
1184        // Escaped pipe inside code is also masked (escape is redundant here)
1185        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1186    }
1187
1188    #[test]
1189    fn test_determine_pipe_style() {
1190        // All pipe styles
1191        assert_eq!(
1192            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1193            Some("leading_and_trailing")
1194        );
1195        assert_eq!(
1196            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1197            Some("leading_only")
1198        );
1199        assert_eq!(
1200            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1201            Some("trailing_only")
1202        );
1203        assert_eq!(
1204            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1205            Some("no_leading_or_trailing")
1206        );
1207
1208        // With whitespace
1209        assert_eq!(
1210            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1211            Some("leading_and_trailing")
1212        );
1213        assert_eq!(
1214            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1215            Some("leading_only")
1216        );
1217
1218        // No pipes
1219        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1220        assert_eq!(TableUtils::determine_pipe_style(""), None);
1221        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1222
1223        // Single pipe cases
1224        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1225        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1226        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1227    }
1228
1229    #[test]
1230    fn test_find_table_blocks_simple() {
1231        let content = "| Header 1 | Header 2 |
1232|-----------|-----------|
1233| Cell 1    | Cell 2    |
1234| Cell 3    | Cell 4    |";
1235
1236        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1237
1238        let tables = TableUtils::find_table_blocks(content, &ctx);
1239        assert_eq!(tables.len(), 1);
1240
1241        let table = &tables[0];
1242        assert_eq!(table.start_line, 0);
1243        assert_eq!(table.end_line, 3);
1244        assert_eq!(table.header_line, 0);
1245        assert_eq!(table.delimiter_line, 1);
1246        assert_eq!(table.content_lines, vec![2, 3]);
1247    }
1248
1249    #[test]
1250    fn test_find_table_blocks_multiple() {
1251        let content = "Some text
1252
1253| Table 1 | Col A |
1254|----------|-------|
1255| Data 1   | Val 1 |
1256
1257More text
1258
1259| Table 2 | Col 2 |
1260|----------|-------|
1261| Data 2   | Data  |";
1262
1263        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1264
1265        let tables = TableUtils::find_table_blocks(content, &ctx);
1266        assert_eq!(tables.len(), 2);
1267
1268        // First table
1269        assert_eq!(tables[0].start_line, 2);
1270        assert_eq!(tables[0].end_line, 4);
1271        assert_eq!(tables[0].header_line, 2);
1272        assert_eq!(tables[0].delimiter_line, 3);
1273        assert_eq!(tables[0].content_lines, vec![4]);
1274
1275        // Second table
1276        assert_eq!(tables[1].start_line, 8);
1277        assert_eq!(tables[1].end_line, 10);
1278        assert_eq!(tables[1].header_line, 8);
1279        assert_eq!(tables[1].delimiter_line, 9);
1280        assert_eq!(tables[1].content_lines, vec![10]);
1281    }
1282
1283    #[test]
1284    fn test_find_table_blocks_no_content_rows() {
1285        let content = "| Header 1 | Header 2 |
1286|-----------|-----------|
1287
1288Next paragraph";
1289
1290        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1291
1292        let tables = TableUtils::find_table_blocks(content, &ctx);
1293        assert_eq!(tables.len(), 1);
1294
1295        let table = &tables[0];
1296        assert_eq!(table.start_line, 0);
1297        assert_eq!(table.end_line, 1); // Just header and delimiter
1298        assert_eq!(table.content_lines.len(), 0);
1299    }
1300
1301    #[test]
1302    fn test_find_table_blocks_in_code_block() {
1303        let content = "```
1304| Not | A | Table |
1305|-----|---|-------|
1306| In  | Code | Block |
1307```
1308
1309| Real | Table |
1310|------|-------|
1311| Data | Here  |";
1312
1313        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1314
1315        let tables = TableUtils::find_table_blocks(content, &ctx);
1316        assert_eq!(tables.len(), 1); // Only the table outside code block
1317
1318        let table = &tables[0];
1319        assert_eq!(table.header_line, 6);
1320        assert_eq!(table.delimiter_line, 7);
1321    }
1322
1323    #[test]
1324    fn test_find_table_blocks_no_tables() {
1325        let content = "Just regular text
1326No tables here
1327- List item with | pipe
1328* Another list item";
1329
1330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1331
1332        let tables = TableUtils::find_table_blocks(content, &ctx);
1333        assert_eq!(tables.len(), 0);
1334    }
1335
1336    #[test]
1337    fn test_find_table_blocks_malformed() {
1338        let content = "| Header without delimiter |
1339| This looks like table |
1340But no delimiter row
1341
1342| Proper | Table |
1343|---------|-------|
1344| Data    | Here  |";
1345
1346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1347
1348        let tables = TableUtils::find_table_blocks(content, &ctx);
1349        assert_eq!(tables.len(), 1); // Only the proper table
1350        assert_eq!(tables[0].header_line, 4);
1351    }
1352
1353    #[test]
1354    fn test_edge_cases() {
1355        // Test empty content
1356        assert!(!TableUtils::is_potential_table_row(""));
1357        assert!(!TableUtils::is_delimiter_row(""));
1358        assert_eq!(TableUtils::count_cells(""), 0);
1359        assert_eq!(TableUtils::determine_pipe_style(""), None);
1360
1361        // Test whitespace only
1362        assert!(!TableUtils::is_potential_table_row("   "));
1363        assert!(!TableUtils::is_delimiter_row("   "));
1364        assert_eq!(TableUtils::count_cells("   "), 0);
1365        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1366
1367        // Test single character
1368        assert!(!TableUtils::is_potential_table_row("|"));
1369        assert!(!TableUtils::is_delimiter_row("|"));
1370        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1371
1372        // Test very long lines are valid table rows (no length limit)
1373        // Test both single-column and multi-column long lines
1374        let long_single = format!("| {} |", "a".repeat(200));
1375        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1376
1377        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1378        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1379
1380        // Test unicode
1381        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1382        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1383        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1384    }
1385
1386    #[test]
1387    fn test_table_block_struct() {
1388        let block = TableBlock {
1389            start_line: 0,
1390            end_line: 5,
1391            header_line: 0,
1392            delimiter_line: 1,
1393            content_lines: vec![2, 3, 4, 5],
1394            list_context: None,
1395        };
1396
1397        // Test Debug trait
1398        let debug_str = format!("{block:?}");
1399        assert!(debug_str.contains("TableBlock"));
1400        assert!(debug_str.contains("start_line: 0"));
1401
1402        // Test Clone trait
1403        let cloned = block.clone();
1404        assert_eq!(cloned.start_line, block.start_line);
1405        assert_eq!(cloned.end_line, block.end_line);
1406        assert_eq!(cloned.header_line, block.header_line);
1407        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1408        assert_eq!(cloned.content_lines, block.content_lines);
1409        assert!(cloned.list_context.is_none());
1410    }
1411
1412    #[test]
1413    fn test_split_table_row() {
1414        // Basic split
1415        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1416        assert_eq!(cells.len(), 3);
1417        assert_eq!(cells[0].trim(), "Cell 1");
1418        assert_eq!(cells[1].trim(), "Cell 2");
1419        assert_eq!(cells[2].trim(), "Cell 3");
1420
1421        // Without trailing pipe
1422        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1423        assert_eq!(cells.len(), 2);
1424
1425        // Empty cells
1426        let cells = TableUtils::split_table_row("| | | |");
1427        assert_eq!(cells.len(), 3);
1428
1429        // Single cell
1430        let cells = TableUtils::split_table_row("| Cell |");
1431        assert_eq!(cells.len(), 1);
1432        assert_eq!(cells[0].trim(), "Cell");
1433
1434        // No pipes
1435        let cells = TableUtils::split_table_row("No pipes here");
1436        assert_eq!(cells.len(), 0);
1437    }
1438
1439    #[test]
1440    fn test_split_table_row_with_escaped_pipes() {
1441        // Escaped pipes should be preserved in cell content
1442        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1443        assert_eq!(cells.len(), 2);
1444        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1445
1446        // Double backslash + pipe is NOT escaped
1447        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1448        assert_eq!(cells.len(), 3);
1449    }
1450
1451    #[test]
1452    fn test_split_table_row_with_flavor_mkdocs() {
1453        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1454        let cells =
1455            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1456        assert_eq!(cells.len(), 2);
1457        assert!(
1458            cells[1].contains("`x | y`"),
1459            "Inline code with pipe should be single cell in MkDocs flavor"
1460        );
1461
1462        // Multiple pipes in inline code
1463        let cells =
1464            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1465        assert_eq!(cells.len(), 2);
1466        assert!(cells[1].contains("`a | b | c`"));
1467    }
1468
1469    #[test]
1470    fn test_split_table_row_with_flavor_standard() {
1471        // Pipes in inline code are NOT cell delimiters for any flavor
1472        let cells =
1473            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1474        assert_eq!(
1475            cells.len(),
1476            2,
1477            "Pipes in code spans should not be cell delimiters, got {cells:?}"
1478        );
1479        assert!(
1480            cells[1].contains("`x | y`"),
1481            "Inline code with pipe should be single cell"
1482        );
1483    }
1484
1485    // === extract_blockquote_prefix tests ===
1486
1487    #[test]
1488    fn test_extract_blockquote_prefix_no_blockquote() {
1489        // Regular table row without blockquote
1490        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1491        assert_eq!(prefix, "");
1492        assert_eq!(content, "| H1 | H2 |");
1493    }
1494
1495    #[test]
1496    fn test_extract_blockquote_prefix_single_level() {
1497        // Single blockquote level
1498        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1499        assert_eq!(prefix, "> ");
1500        assert_eq!(content, "| H1 | H2 |");
1501    }
1502
1503    #[test]
1504    fn test_extract_blockquote_prefix_double_level() {
1505        // Double blockquote level
1506        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1507        assert_eq!(prefix, ">> ");
1508        assert_eq!(content, "| H1 | H2 |");
1509    }
1510
1511    #[test]
1512    fn test_extract_blockquote_prefix_triple_level() {
1513        // Triple blockquote level
1514        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1515        assert_eq!(prefix, ">>> ");
1516        assert_eq!(content, "| H1 | H2 |");
1517    }
1518
1519    #[test]
1520    fn test_extract_blockquote_prefix_with_spaces() {
1521        // Blockquote with spaces between markers
1522        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1523        assert_eq!(prefix, "> > ");
1524        assert_eq!(content, "| H1 | H2 |");
1525    }
1526
1527    #[test]
1528    fn test_extract_blockquote_prefix_indented() {
1529        // Indented blockquote
1530        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1531        assert_eq!(prefix, "  > ");
1532        assert_eq!(content, "| H1 | H2 |");
1533    }
1534
1535    #[test]
1536    fn test_extract_blockquote_prefix_no_space_after() {
1537        // Blockquote without space after marker
1538        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1539        assert_eq!(prefix, ">");
1540        assert_eq!(content, "| H1 | H2 |");
1541    }
1542
1543    #[test]
1544    fn test_determine_pipe_style_in_blockquote() {
1545        // determine_pipe_style should handle blockquotes correctly
1546        assert_eq!(
1547            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1548            Some("leading_and_trailing")
1549        );
1550        assert_eq!(
1551            TableUtils::determine_pipe_style("> H1 | H2"),
1552            Some("no_leading_or_trailing")
1553        );
1554        assert_eq!(
1555            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1556            Some("leading_and_trailing")
1557        );
1558        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1559    }
1560
1561    #[test]
1562    fn test_list_table_delimiter_requires_indentation() {
1563        // Test case: list item contains pipe, but delimiter line is at column 1
1564        // This should NOT be detected as a list table since the delimiter has no indentation.
1565        // The result is a non-list table starting at line 0 (the list item becomes the header)
1566        // but list_context should be None.
1567        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1568        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1569        let tables = TableUtils::find_table_blocks(content, &ctx);
1570
1571        // The table will be detected starting at line 0, but crucially it should NOT have
1572        // list_context set, meaning it won't be treated as a list-table for column count purposes
1573        assert_eq!(tables.len(), 1, "Should find exactly one table");
1574        assert!(
1575            tables[0].list_context.is_none(),
1576            "Should NOT have list context since delimiter has no indentation"
1577        );
1578    }
1579
1580    #[test]
1581    fn test_list_table_with_properly_indented_delimiter() {
1582        // Test case: list item with table header, delimiter properly indented
1583        // This SHOULD be detected as a list table
1584        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1586        let tables = TableUtils::find_table_blocks(content, &ctx);
1587
1588        // Should find exactly one list-table starting at line 0
1589        assert_eq!(tables.len(), 1, "Should find exactly one table");
1590        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1591        assert!(
1592            tables[0].list_context.is_some(),
1593            "Should be a list table since delimiter is properly indented"
1594        );
1595    }
1596
1597    #[test]
1598    fn test_mask_pipes_in_inline_code_regular_backticks() {
1599        // Regular backtick code span: pipe should be masked
1600        let result = TableUtils::mask_pipes_in_inline_code("| `code | here` |");
1601        assert_eq!(result, "| `code _ here` |");
1602    }
1603
1604    #[test]
1605    fn test_mask_pipes_in_inline_code_escaped_backtick_not_code_span() {
1606        // Escaped backtick (\`) is literal text, not a code span opener.
1607        // The pipe should NOT be masked.
1608        let result = TableUtils::mask_pipes_in_inline_code(r"| \`not code | still pipe\` |");
1609        assert_eq!(result, r"| \`not code | still pipe\` |");
1610    }
1611
1612    #[test]
1613    fn test_mask_pipes_in_inline_code_escaped_backslash_then_backtick() {
1614        // Escaped backslash (\\) followed by backtick: the backtick IS a code span opener.
1615        // The pipe inside the code span SHOULD be masked.
1616        let result = TableUtils::mask_pipes_in_inline_code(r"| \\`real code | masked\\` |");
1617        // \\` = escaped backslash + real backtick (code span opener)
1618        // The pipe between the backticks should be masked
1619        assert_eq!(result, r"| \\`real code _ masked\\` |");
1620    }
1621
1622    #[test]
1623    fn test_mask_pipes_in_inline_code_triple_backslash_before_backtick() {
1624        // Three backslashes before backtick: odd count means backtick is escaped
1625        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\`not code | pipe\\\` |");
1626        assert_eq!(result, r"| \\\`not code | pipe\\\` |");
1627    }
1628
1629    #[test]
1630    fn test_mask_pipes_in_inline_code_four_backslashes_before_backtick() {
1631        // Four backslashes before backtick: even count means backtick is a real delimiter
1632        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\\`code | here\\\\` |");
1633        assert_eq!(result, r"| \\\\`code _ here\\\\` |");
1634    }
1635
1636    #[test]
1637    fn test_mask_pipes_in_inline_code_no_backslash() {
1638        // No backslashes at all: standard behavior, pipe inside code span is masked
1639        let result = TableUtils::mask_pipes_in_inline_code("before `a | b` after");
1640        assert_eq!(result, "before `a _ b` after");
1641    }
1642
1643    #[test]
1644    fn test_mask_pipes_in_inline_code_no_code_span() {
1645        // No backticks at all: nothing should be masked
1646        let result = TableUtils::mask_pipes_in_inline_code("| col1 | col2 |");
1647        assert_eq!(result, "| col1 | col2 |");
1648    }
1649
1650    #[test]
1651    fn test_mask_pipes_in_inline_code_backslash_before_closing_backtick() {
1652        // Per CommonMark spec, backslash escapes do NOT work inside code spans.
1653        // Inside a code span, `\` is a literal character. So `foo\` is a valid
1654        // code span containing "foo\", and the closing backtick is NOT escaped.
1655        //
1656        // Input: | `foo\` | bar |
1657        // The code span is `foo\` (backtick opens, backslash is literal, backtick closes).
1658        // The pipe after the code span is a real delimiter, producing 2 cells.
1659        // The pipe inside the code span should be left alone (there isn't one here).
1660        let result = TableUtils::mask_pipes_in_inline_code(r"| `foo\` | bar |");
1661        // The backslash before closing backtick is literal inside the code span,
1662        // so the code span closes at that backtick. The pipe between cells is NOT masked.
1663        assert_eq!(result, r"| `foo\` | bar |");
1664    }
1665
1666    #[test]
1667    fn test_mask_pipes_in_inline_code_backslash_literal_with_pipe_inside() {
1668        // Code span contains a backslash and a pipe: `a\|b`
1669        // The backslash is literal inside the code span (CommonMark spec).
1670        // The pipe is inside the code span, so it should be masked.
1671        let result = TableUtils::mask_pipes_in_inline_code(r"| `a\|b` | col2 |");
1672        assert_eq!(result, r"| `a\_b` | col2 |");
1673    }
1674
1675    #[test]
1676    fn test_count_preceding_backslashes() {
1677        let chars: Vec<char> = r"abc\\\`def".chars().collect();
1678        // Position of backtick is at index 6 (a=0, b=1, c=2, \=3, \=4, \=5, `=6)
1679        assert_eq!(TableUtils::count_preceding_backslashes(&chars, 6), 3);
1680
1681        let chars2: Vec<char> = r"abc\\`def".chars().collect();
1682        // Position of backtick is at index 5
1683        assert_eq!(TableUtils::count_preceding_backslashes(&chars2, 5), 2);
1684
1685        let chars3: Vec<char> = "`def".chars().collect();
1686        // Position of backtick is at index 0 -- no preceding chars
1687        assert_eq!(TableUtils::count_preceding_backslashes(&chars3, 0), 0);
1688    }
1689
1690    #[test]
1691    fn test_has_unescaped_pipe_backslash_literal_in_code_span() {
1692        // Per CommonMark: backslashes are literal inside code spans.
1693        // `foo\` is a complete code span, so the pipe after it is outside code.
1694        assert!(TableUtils::has_unescaped_pipe_outside_spans(r"`foo\` | bar"));
1695
1696        // Escaped backtick outside code span: \` is not a code span opener
1697        assert!(TableUtils::has_unescaped_pipe_outside_spans(r"\`foo | bar\`"));
1698
1699        // Pipe inside code span should not count
1700        assert!(!TableUtils::has_unescaped_pipe_outside_spans(r"`foo | bar`"));
1701    }
1702
1703    #[test]
1704    fn test_table_after_code_span_detected() {
1705        use crate::config::MarkdownFlavor;
1706
1707        let content = "`code`\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1708        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1709        assert!(!ctx.table_blocks.is_empty(), "Table after code span should be detected");
1710    }
1711
1712    #[test]
1713    fn test_table_inside_html_comment_not_detected() {
1714        use crate::config::MarkdownFlavor;
1715
1716        let content = "<!--\n| A | B |\n|---|---|\n| 1 | 2 |\n-->\n";
1717        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1718        assert!(
1719            ctx.table_blocks.is_empty(),
1720            "Table inside HTML comment should not be detected"
1721        );
1722    }
1723}