Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Returns true if the line has at least one unescaped pipe separator outside inline code spans.
33    ///
34    /// This helps distinguish actual table separators from command/prose examples like
35    /// `` `echo a | sed 's/a/b/'` `` where the pipe is fully inside inline code.
36    fn has_unescaped_pipe_outside_inline_code(text: &str) -> bool {
37        let chars: Vec<char> = text.chars().collect();
38        let mut i = 0;
39        let mut in_code = false;
40        let mut code_delim_len = 0usize;
41
42        while i < chars.len() {
43            let ch = chars[i];
44
45            if ch == '\\' && !in_code {
46                // Skip escaped character (only outside code spans —
47                // backslashes are literal inside code spans per CommonMark).
48                i += if i + 1 < chars.len() { 2 } else { 1 };
49                continue;
50            }
51
52            if ch == '`' {
53                let mut run = 1usize;
54                while i + run < chars.len() && chars[i + run] == '`' {
55                    run += 1;
56                }
57
58                if in_code {
59                    if run == code_delim_len {
60                        in_code = false;
61                        code_delim_len = 0;
62                    }
63                } else {
64                    in_code = true;
65                    code_delim_len = run;
66                }
67
68                i += run;
69                continue;
70            }
71
72            if ch == '|' && !in_code {
73                return true;
74            }
75
76            i += 1;
77        }
78
79        false
80    }
81
82    /// Check if a line looks like a potential table row
83    pub fn is_potential_table_row(line: &str) -> bool {
84        let trimmed = line.trim();
85        if trimmed.is_empty() || !trimmed.contains('|') {
86            return false;
87        }
88
89        // Skip lines that are clearly not table rows
90        // Unordered list items with space or tab after marker
91        if trimmed.starts_with("- ")
92            || trimmed.starts_with("* ")
93            || trimmed.starts_with("+ ")
94            || trimmed.starts_with("-\t")
95            || trimmed.starts_with("*\t")
96            || trimmed.starts_with("+\t")
97        {
98            return false;
99        }
100
101        // Skip ordered list items: digits followed by . or ) then space/tab
102        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
103            && first_non_digit > 0
104        {
105            let after_digits = &trimmed[first_non_digit..];
106            if after_digits.starts_with(". ")
107                || after_digits.starts_with(".\t")
108                || after_digits.starts_with(") ")
109                || after_digits.starts_with(")\t")
110            {
111                return false;
112            }
113        }
114
115        // Skip ATX headings (# through ######)
116        if trimmed.starts_with('#') {
117            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
118            if hash_count <= 6 {
119                let after_hashes = &trimmed[hash_count..];
120                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
121                    return false;
122                }
123            }
124        }
125
126        // For rows without explicit outer pipes, require a real separator outside
127        // inline code spans to avoid prose/command false positives.
128        let has_outer_pipes = trimmed.starts_with('|') && trimmed.ends_with('|');
129        if !has_outer_pipes && !Self::has_unescaped_pipe_outside_inline_code(trimmed) {
130            return false;
131        }
132
133        // Must have at least 2 parts when split by |
134        let parts: Vec<&str> = trimmed.split('|').collect();
135        if parts.len() < 2 {
136            return false;
137        }
138
139        // Check if it looks like a table row by having reasonable content between pipes
140        let mut valid_parts = 0;
141        let mut total_non_empty_parts = 0;
142
143        for part in &parts {
144            let part_trimmed = part.trim();
145            // Skip empty parts (from leading/trailing pipes)
146            if part_trimmed.is_empty() {
147                continue;
148            }
149            total_non_empty_parts += 1;
150
151            // Count parts that look like table cells (reasonable content, no newlines)
152            if !part_trimmed.contains('\n') {
153                valid_parts += 1;
154            }
155        }
156
157        // Check if all non-empty parts are valid (no newlines)
158        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
159            // Some cells contain newlines, not a valid table row
160            return false;
161        }
162
163        // GFM allows tables with all empty cells (e.g., |||)
164        // These are valid if they have proper table formatting (leading and trailing pipes)
165        if total_non_empty_parts == 0 {
166            // Empty cells are only valid with proper pipe formatting
167            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
168        }
169
170        // GFM allows single-column tables, so >= 1 valid part is enough
171        // when the line has proper table formatting (pipes)
172        if trimmed.starts_with('|') && trimmed.ends_with('|') {
173            // Properly formatted table row with pipes on both ends
174            valid_parts >= 1
175        } else {
176            // For rows without proper pipe formatting, require at least 2 cells
177            valid_parts >= 2
178        }
179    }
180
181    /// Check if a line is a table delimiter row (e.g., |---|---|)
182    pub fn is_delimiter_row(line: &str) -> bool {
183        let trimmed = line.trim();
184        if !trimmed.contains('|') || !trimmed.contains('-') {
185            return false;
186        }
187
188        // Split by pipes and check each part
189        let parts: Vec<&str> = trimmed.split('|').collect();
190        let mut valid_delimiter_parts = 0;
191        let mut total_non_empty_parts = 0;
192
193        for part in &parts {
194            let part_trimmed = part.trim();
195            if part_trimmed.is_empty() {
196                continue; // Skip empty parts from leading/trailing pipes
197            }
198
199            total_non_empty_parts += 1;
200
201            // Check if this part looks like a delimiter (contains dashes and optionally colons)
202            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
203                valid_delimiter_parts += 1;
204            }
205        }
206
207        // All non-empty parts must be valid delimiters, and there must be at least one
208        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
209    }
210
211    /// Strip blockquote prefix from a line, returning the content without the prefix
212    fn strip_blockquote_prefix(line: &str) -> &str {
213        let trimmed = line.trim_start();
214        if trimmed.starts_with('>') {
215            // Strip all blockquote markers and following space
216            let mut rest = trimmed;
217            while rest.starts_with('>') {
218                rest = rest.strip_prefix('>').unwrap_or(rest);
219                rest = rest.trim_start_matches(' ');
220            }
221            rest
222        } else {
223            line
224        }
225    }
226
227    /// Find all table blocks in the content with optimized detection
228    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
229    pub fn find_table_blocks_with_code_info(
230        content: &str,
231        code_blocks: &[(usize, usize)],
232        code_spans: &[crate::lint_context::CodeSpan],
233        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
234    ) -> Vec<TableBlock> {
235        let lines: Vec<&str> = content.lines().collect();
236        let mut tables = Vec::new();
237        let mut i = 0;
238
239        // Pre-compute line positions for efficient code block checking
240        let mut line_positions = Vec::with_capacity(lines.len());
241        let mut pos = 0;
242        for line in &lines {
243            line_positions.push(pos);
244            pos += line.len() + 1; // +1 for newline
245        }
246
247        // Stack of active list content indents for continuation table tracking.
248        // Supports nested lists: when a child list is seen, we push; when we
249        // dedent past a level, we pop back to the enclosing list.
250        let mut list_indent_stack: Vec<usize> = Vec::new();
251
252        while i < lines.len() {
253            // Skip lines in code blocks, code spans, or HTML comments
254            let line_start = line_positions[i];
255            let in_code =
256                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
257                    || code_spans
258                        .iter()
259                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
260            let in_html_comment = html_comment_ranges
261                .iter()
262                .any(|range| line_start >= range.start && line_start < range.end);
263
264            if in_code || in_html_comment {
265                i += 1;
266                continue;
267            }
268
269            // Strip blockquote prefix for table detection
270            let line_content = Self::strip_blockquote_prefix(lines[i]);
271
272            // Update active list tracking
273            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
274            if !list_prefix.is_empty() {
275                // Line has a list marker. Pop any deeper/equal levels, then push this one.
276                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
277                    list_indent_stack.pop();
278                }
279                list_indent_stack.push(content_indent);
280            } else if !line_content.trim().is_empty() {
281                // Non-blank line without a marker: pop any levels we've dedented past
282                let leading = line_content.len() - line_content.trim_start().len();
283                while list_indent_stack.last().is_some_and(|&top| leading < top) {
284                    list_indent_stack.pop();
285                }
286            }
287            // Blank lines keep the stack unchanged (blank lines don't end list items)
288
289            // Check if this is a list item that contains a table row on the same line,
290            // or a continuation table indented under an active list item
291            let (is_same_line_list_table, effective_content) =
292                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
293                    (true, list_content)
294                } else {
295                    (false, line_content)
296                };
297
298            // Detect continuation list tables: no marker on this line, but indented
299            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
300            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
301                let leading = line_content.len() - line_content.trim_start().len();
302                // Find the deepest list level this line is indented under
303                list_indent_stack
304                    .iter()
305                    .rev()
306                    .find(|&&indent| leading >= indent)
307                    .copied()
308            } else {
309                None
310            };
311
312            let is_continuation_list_table = continuation_indent.is_some()
313                && {
314                    let indent = continuation_indent.unwrap();
315                    let leading = line_content.len() - line_content.trim_start().len();
316                    // Per CommonMark, 4+ spaces beyond content indent is a code block
317                    leading < indent + 4
318                }
319                && Self::is_potential_table_row(effective_content);
320
321            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
322
323            // For continuation list tables, use the matched list indent
324            let effective_content_indent = if is_same_line_list_table {
325                content_indent
326            } else if is_continuation_list_table {
327                continuation_indent.unwrap()
328            } else {
329                0
330            };
331
332            // Look for potential table start
333            if is_any_list_table || Self::is_potential_table_row(effective_content) {
334                // For list tables (same-line or continuation), check indented continuation lines
335                // For regular tables, check the next line directly
336                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
337                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
338                    if is_any_list_table {
339                        // Verify the delimiter line has proper indentation
340                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
341                        if leading_spaces >= effective_content_indent {
342                            // Has proper indentation, strip it and check as delimiter
343                            (
344                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
345                                true,
346                            )
347                        } else {
348                            // Not enough indentation - not a list table
349                            (next_raw, false)
350                        }
351                    } else {
352                        (next_raw, true)
353                    }
354                } else {
355                    ("", true)
356                };
357
358                // For list tables, only accept if delimiter has valid indentation
359                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
360
361                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
362                    // Found a table! Find its end
363                    let table_start = i;
364                    let header_line = i;
365                    let delimiter_line = i + 1;
366                    let mut table_end = i + 1; // Include the delimiter row
367                    let mut content_lines = Vec::new();
368
369                    // Continue while we have table rows
370                    let mut j = i + 2;
371                    while j < lines.len() {
372                        let line = lines[j];
373                        // Strip blockquote prefix for checking
374                        let raw_content = Self::strip_blockquote_prefix(line);
375
376                        // For list tables, strip expected indentation
377                        let line_content = if effective_is_list_table {
378                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
379                        } else {
380                            raw_content
381                        };
382
383                        if line_content.trim().is_empty() {
384                            // Empty line ends the table
385                            break;
386                        }
387
388                        // For list tables, the continuation line must have proper indentation
389                        if effective_is_list_table {
390                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
391                            if leading_spaces < effective_content_indent {
392                                // Not enough indentation - end of table
393                                break;
394                            }
395                        }
396
397                        if Self::is_potential_table_row(line_content) {
398                            content_lines.push(j);
399                            table_end = j;
400                            j += 1;
401                        } else {
402                            // Non-table line ends the table
403                            break;
404                        }
405                    }
406
407                    let list_context = if effective_is_list_table {
408                        if is_same_line_list_table {
409                            // Same-line: prefix is the actual list marker (e.g., "- ")
410                            Some(ListTableContext {
411                                list_prefix: list_prefix.to_string(),
412                                content_indent: effective_content_indent,
413                            })
414                        } else {
415                            // Continuation: prefix is the indentation spaces
416                            Some(ListTableContext {
417                                list_prefix: " ".repeat(effective_content_indent),
418                                content_indent: effective_content_indent,
419                            })
420                        }
421                    } else {
422                        None
423                    };
424
425                    tables.push(TableBlock {
426                        start_line: table_start,
427                        end_line: table_end,
428                        header_line,
429                        delimiter_line,
430                        content_lines,
431                        list_context,
432                    });
433                    i = table_end + 1;
434                } else {
435                    i += 1;
436                }
437            } else {
438                i += 1;
439            }
440        }
441
442        tables
443    }
444
445    /// Strip list continuation indentation from a line.
446    /// For lines that are continuations of a list item's content, strip the expected indent.
447    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
448        let bytes = line.as_bytes();
449        let mut spaces = 0;
450
451        for &b in bytes {
452            if b == b' ' {
453                spaces += 1;
454            } else if b == b'\t' {
455                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
456                spaces = (spaces / 4 + 1) * 4;
457            } else {
458                break;
459            }
460
461            if spaces >= expected_indent {
462                break;
463            }
464        }
465
466        // Strip at most expected_indent characters
467        let strip_count = spaces.min(expected_indent).min(line.len());
468        // Count actual bytes to strip (handling tabs)
469        let mut byte_count = 0;
470        let mut counted_spaces = 0;
471        for &b in bytes {
472            if counted_spaces >= strip_count {
473                break;
474            }
475            if b == b' ' {
476                counted_spaces += 1;
477                byte_count += 1;
478            } else if b == b'\t' {
479                counted_spaces = (counted_spaces / 4 + 1) * 4;
480                byte_count += 1;
481            } else {
482                break;
483            }
484        }
485
486        &line[byte_count..]
487    }
488
489    /// Find all table blocks in the content with optimized detection
490    /// This is a backward-compatible wrapper that accepts LintContext
491    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
492        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
493    }
494
495    /// Count the number of cells in a table row
496    pub fn count_cells(row: &str) -> usize {
497        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
498    }
499
500    /// Count the number of cells in a table row with flavor-specific behavior
501    ///
502    /// Pipes inside code spans are treated as content, not cell delimiters.
503    ///
504    /// This function strips blockquote prefixes before counting cells, so it works
505    /// correctly for tables inside blockquotes.
506    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
507        // Strip blockquote prefix if present before counting cells
508        let (_, content) = Self::extract_blockquote_prefix(row);
509        Self::split_table_row_with_flavor(content, flavor).len()
510    }
511
512    /// Count the number of consecutive backslashes immediately preceding `pos` in `chars`.
513    fn count_preceding_backslashes(chars: &[char], pos: usize) -> usize {
514        let mut count = 0;
515        let mut k = pos;
516        while k > 0 {
517            k -= 1;
518            if chars[k] == '\\' {
519                count += 1;
520            } else {
521                break;
522            }
523        }
524        count
525    }
526
527    /// Mask pipes inside inline code blocks with a placeholder character.
528    ///
529    /// Backticks preceded by an odd number of backslashes are escaped (literal text)
530    /// and do not open or close code spans. An even number of backslashes means the
531    /// backslashes themselves are escaped, so the backtick is a real delimiter.
532    pub fn mask_pipes_in_inline_code(text: &str) -> String {
533        let mut result = String::new();
534        let chars: Vec<char> = text.chars().collect();
535        let mut i = 0;
536
537        while i < chars.len() {
538            if chars[i] == '`' {
539                // A backtick preceded by an odd number of backslashes is escaped
540                let preceding = Self::count_preceding_backslashes(&chars, i);
541                if preceding % 2 != 0 {
542                    // Escaped backtick -- treat as literal text, not a code span opener
543                    result.push(chars[i]);
544                    i += 1;
545                    continue;
546                }
547
548                // Count consecutive backticks at start
549                let start = i;
550                let mut backtick_count = 0;
551                while i < chars.len() && chars[i] == '`' {
552                    backtick_count += 1;
553                    i += 1;
554                }
555
556                // Look for matching closing backticks
557                let mut found_closing = false;
558                let mut j = i;
559
560                while j < chars.len() {
561                    if chars[j] == '`' {
562                        // Per CommonMark spec, backslash escapes do NOT work inside code
563                        // spans -- all characters including backslashes are literal. So we
564                        // do NOT check count_preceding_backslashes here (only for the
565                        // opening backtick above).
566
567                        // Count potential closing backticks
568                        let close_start = j;
569                        let mut close_count = 0;
570                        while j < chars.len() && chars[j] == '`' {
571                            close_count += 1;
572                            j += 1;
573                        }
574
575                        if close_count == backtick_count {
576                            // Found matching closing backticks
577                            found_closing = true;
578
579                            // Valid inline code - add with pipes masked
580                            result.extend(chars[start..i].iter());
581
582                            for &ch in chars.iter().take(close_start).skip(i) {
583                                if ch == '|' {
584                                    result.push('_'); // Mask pipe with underscore
585                                } else {
586                                    result.push(ch);
587                                }
588                            }
589
590                            result.extend(chars[close_start..j].iter());
591                            i = j;
592                            break;
593                        }
594                        // If not matching, continue searching (j is already past these backticks)
595                    } else {
596                        j += 1;
597                    }
598                }
599
600                if !found_closing {
601                    // No matching closing found, treat as regular text
602                    result.extend(chars[start..i].iter());
603                }
604            } else {
605                result.push(chars[i]);
606                i += 1;
607            }
608        }
609
610        result
611    }
612
613    /// Mask escaped pipes for accurate table cell parsing
614    ///
615    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
616    /// - `\|` → escaped pipe → masked (stays as cell content)
617    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
618    ///
619    /// This function only handles escaped pipes. Pipes inside inline code spans
620    /// are handled separately by `mask_pipes_in_inline_code`.
621    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
622        let mut result = String::new();
623        let chars: Vec<char> = text.chars().collect();
624        let mut i = 0;
625
626        while i < chars.len() {
627            if chars[i] == '\\' {
628                if i + 1 < chars.len() && chars[i + 1] == '\\' {
629                    // Escaped backslash: \\ → push both and continue
630                    // The next character (if it's a pipe) will be a real delimiter
631                    result.push('\\');
632                    result.push('\\');
633                    i += 2;
634                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
635                    // Escaped pipe: \| → mask the pipe
636                    result.push('\\');
637                    result.push('_'); // Mask the pipe
638                    i += 2;
639                } else {
640                    // Single backslash not followed by \ or | → just push it
641                    result.push(chars[i]);
642                    i += 1;
643                }
644            } else {
645                result.push(chars[i]);
646                i += 1;
647            }
648        }
649
650        result
651    }
652
653    /// Split a table row into individual cell contents with flavor-specific behavior.
654    ///
655    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
656    /// This is the foundation for both cell counting and cell content extraction.
657    ///
658    /// Pipes inside code spans are treated as content, not cell delimiters.
659    pub fn split_table_row_with_flavor(row: &str, _flavor: crate::config::MarkdownFlavor) -> Vec<String> {
660        let trimmed = row.trim();
661
662        if !trimmed.contains('|') {
663            return Vec::new();
664        }
665
666        // First, mask escaped pipes (same for all flavors)
667        let masked = Self::mask_pipes_for_table_parsing(trimmed);
668
669        // Mask pipes inside inline code for all flavors
670        let final_masked = Self::mask_pipes_in_inline_code(&masked);
671
672        let has_leading = final_masked.starts_with('|');
673        let has_trailing = final_masked.ends_with('|');
674
675        let mut masked_content = final_masked.as_str();
676        let mut orig_content = trimmed;
677
678        if has_leading {
679            masked_content = &masked_content[1..];
680            orig_content = &orig_content[1..];
681        }
682
683        // Track whether we actually strip a trailing pipe
684        let stripped_trailing = has_trailing && !masked_content.is_empty();
685        if stripped_trailing {
686            masked_content = &masked_content[..masked_content.len() - 1];
687            orig_content = &orig_content[..orig_content.len() - 1];
688        }
689
690        // Handle edge cases for degenerate inputs
691        if masked_content.is_empty() {
692            if stripped_trailing {
693                // "||" case: two pipes with empty content between = one empty cell
694                return vec![String::new()];
695            } else {
696                // "|" case: single pipe, not a valid table row
697                return Vec::new();
698            }
699        }
700
701        let masked_parts: Vec<&str> = masked_content.split('|').collect();
702        let mut cells = Vec::new();
703        let mut pos = 0;
704
705        for masked_cell in masked_parts {
706            let cell_len = masked_cell.len();
707            let orig_cell = if pos + cell_len <= orig_content.len() {
708                &orig_content[pos..pos + cell_len]
709            } else {
710                masked_cell
711            };
712            cells.push(orig_cell.to_string());
713            pos += cell_len + 1; // +1 for the pipe delimiter
714        }
715
716        cells
717    }
718
719    /// Split a table row into individual cell contents using Standard/GFM behavior.
720    pub fn split_table_row(row: &str) -> Vec<String> {
721        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
722    }
723
724    /// Determine the pipe style of a table row
725    ///
726    /// Handles tables inside blockquotes by stripping the blockquote prefix
727    /// before analyzing the pipe style.
728    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
729        // Strip blockquote prefix if present before analyzing pipe style
730        let content = Self::strip_blockquote_prefix(line);
731        let trimmed = content.trim();
732        if !trimmed.contains('|') {
733            return None;
734        }
735
736        let has_leading = trimmed.starts_with('|');
737        let has_trailing = trimmed.ends_with('|');
738
739        match (has_leading, has_trailing) {
740            (true, true) => Some("leading_and_trailing"),
741            (true, false) => Some("leading_only"),
742            (false, true) => Some("trailing_only"),
743            (false, false) => Some("no_leading_or_trailing"),
744        }
745    }
746
747    /// Extract blockquote prefix from a line, returning (prefix, content).
748    ///
749    /// This is useful for stripping the prefix before processing, then restoring it after.
750    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
751    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
752        // Find where the actual content starts (after blockquote markers and spaces)
753        let bytes = line.as_bytes();
754        let mut pos = 0;
755
756        // Skip leading whitespace (indent before blockquote marker)
757        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
758            pos += 1;
759        }
760
761        // If no blockquote marker, return empty prefix
762        if pos >= bytes.len() || bytes[pos] != b'>' {
763            return ("", line);
764        }
765
766        // Skip all blockquote markers and spaces
767        while pos < bytes.len() {
768            if bytes[pos] == b'>' {
769                pos += 1;
770                // Skip optional space after >
771                if pos < bytes.len() && bytes[pos] == b' ' {
772                    pos += 1;
773                }
774            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
775                pos += 1;
776            } else {
777                break;
778            }
779        }
780
781        // Split at the position where content starts
782        (&line[..pos], &line[pos..])
783    }
784
785    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
786    ///
787    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
788    /// Returns:
789    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
790    /// - content: The content after the list marker
791    /// - content_indent: The number of spaces needed for continuation lines to align with content
792    ///
793    /// For example:
794    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
795    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
796    /// - `"  - table"` returns `("  - ", "table", 4)`
797    ///
798    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
799    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
800        let bytes = line.as_bytes();
801
802        // Skip leading whitespace
803        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
804        let mut pos = leading_spaces;
805
806        if pos >= bytes.len() {
807            return ("", line, 0);
808        }
809
810        // Check for unordered list marker: -, *, +
811        if matches!(bytes[pos], b'-' | b'*' | b'+') {
812            pos += 1;
813
814            // Must be followed by space or tab (or end of line for marker-only lines)
815            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
816                // Skip the space after marker if present
817                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
818                    pos += 1;
819                }
820                let content_indent = pos;
821                return (&line[..pos], &line[pos..], content_indent);
822            }
823            // Not a list marker (e.g., "-word" or "--")
824            return ("", line, 0);
825        }
826
827        // Check for ordered list marker: digits followed by . or ) then space
828        if bytes[pos].is_ascii_digit() {
829            let digit_start = pos;
830            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
831                pos += 1;
832            }
833
834            // Must have at least one digit
835            if pos > digit_start && pos < bytes.len() {
836                // Check for . or ) followed by space/tab
837                if bytes[pos] == b'.' || bytes[pos] == b')' {
838                    pos += 1;
839                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
840                        // Skip the space after marker if present
841                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
842                            pos += 1;
843                        }
844                        let content_indent = pos;
845                        return (&line[..pos], &line[pos..], content_indent);
846                    }
847                }
848            }
849        }
850
851        ("", line, 0)
852    }
853
854    /// Extract the table row content from a line, stripping any list/blockquote prefix.
855    ///
856    /// This is useful for processing table rows that may be inside list items or blockquotes.
857    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
858    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
859        // First strip blockquote prefix
860        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
861
862        // Then handle list prefix if present
863        if let Some(ref list_ctx) = table_block.list_context {
864            if line_index == 0 {
865                // Header line: strip list prefix (handles both markers and indentation)
866                after_blockquote
867                    .strip_prefix(&list_ctx.list_prefix)
868                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
869            } else {
870                // Continuation lines: strip indentation
871                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
872            }
873        } else {
874            after_blockquote
875        }
876    }
877
878    /// Check if the content after a list marker looks like a table row.
879    /// This is used to detect tables that start on the same line as a list marker.
880    pub fn is_list_item_with_table_row(line: &str) -> bool {
881        let (prefix, content, _) = Self::extract_list_prefix(line);
882        if prefix.is_empty() {
883            return false;
884        }
885
886        // Check if the content after the list marker is a table row
887        // It must start with | (proper table format within a list)
888        let trimmed = content.trim();
889        if !trimmed.starts_with('|') {
890            return false;
891        }
892
893        // Use our table row detection on the content
894        Self::is_potential_table_row_content(content)
895    }
896
897    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
898    fn is_potential_table_row_content(content: &str) -> bool {
899        Self::is_potential_table_row(content)
900    }
901}
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906    use crate::lint_context::LintContext;
907
908    #[test]
909    fn test_is_potential_table_row() {
910        // Basic valid table rows
911        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
912        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
913        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
914        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
915
916        // Multiple cells
917        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
918
919        // With whitespace
920        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
921        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
922
923        // Not table rows
924        assert!(!TableUtils::is_potential_table_row("- List item"));
925        assert!(!TableUtils::is_potential_table_row("* Another list"));
926        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
927        assert!(!TableUtils::is_potential_table_row("Regular text"));
928        assert!(!TableUtils::is_potential_table_row(""));
929        assert!(!TableUtils::is_potential_table_row("   "));
930
931        // Code blocks
932        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
933        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
934        assert!(!TableUtils::is_potential_table_row("Use ``a|b`` in prose"));
935        assert!(TableUtils::is_potential_table_row("| `fenced` | Uses ``` and ~~~ |"));
936        assert!(TableUtils::is_potential_table_row("`!foo && bar` | `(!foo) && bar`"));
937        assert!(!TableUtils::is_potential_table_row("`echo a | sed 's/a/b/'`"));
938
939        // Single pipe not enough
940        assert!(!TableUtils::is_potential_table_row("Just one |"));
941        assert!(!TableUtils::is_potential_table_row("| Just one"));
942
943        // Very long cells are valid in tables (no length limit for cell content)
944        let long_cell = "a".repeat(150);
945        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
946
947        // Cells with newlines
948        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
949
950        // Empty cells (Issue #129)
951        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
952        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
953        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
954    }
955
956    #[test]
957    fn test_list_items_with_pipes_not_table_rows() {
958        // Ordered list items should NOT be detected as table rows
959        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
960        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
961        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
962        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
963        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
964
965        // Unordered list items with tabs
966        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
967        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
968        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
969
970        // Indented list items (the trim_start normalizes indentation)
971        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
972        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
973        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
974
975        // Task list items
976        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
977        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
978
979        // Multiple pipes in list items
980        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
981        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
982
983        // These SHOULD still be detected as potential table rows
984        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
985        assert!(TableUtils::is_potential_table_row("cell | cell"));
986        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
987    }
988
989    #[test]
990    fn test_atx_headings_with_pipes_not_table_rows() {
991        // All 6 ATX heading levels with pipes
992        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
993        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
994        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
995        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
996        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
997        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
998
999        // Multiple pipes in headings
1000        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1001        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1002
1003        // Headings with tab after hashes
1004        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1005        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1006
1007        // Heading with only hashes and pipe (empty heading text)
1008        assert!(!TableUtils::is_potential_table_row("# |"));
1009        assert!(!TableUtils::is_potential_table_row("## |"));
1010
1011        // Indented headings (spaces before #)
1012        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1013        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1014
1015        // Unicode content in headings (the original proptest failure case)
1016        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1017
1018        // 7+ hashes are NOT headings — should follow normal table detection
1019        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1020        // but with a space it still has 7+ hashes so not a heading
1021        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1022
1023        // Hash without space is NOT a heading, so pipe detection applies
1024        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1025
1026        // These SHOULD still be detected as potential table rows
1027        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1028        assert!(TableUtils::is_potential_table_row("text | #tag"));
1029    }
1030
1031    #[test]
1032    fn test_is_delimiter_row() {
1033        // Basic delimiter rows
1034        assert!(TableUtils::is_delimiter_row("|---|---|"));
1035        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1036        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1037        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1038
1039        // With varying dash counts
1040        assert!(TableUtils::is_delimiter_row("|-|--|"));
1041        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1042
1043        // With whitespace
1044        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1045        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1046
1047        // Multiple columns
1048        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1049
1050        // Without leading/trailing pipes
1051        assert!(TableUtils::is_delimiter_row("--- | ---"));
1052        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1053
1054        // Not delimiter rows
1055        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1056        assert!(!TableUtils::is_delimiter_row("Regular text"));
1057        assert!(!TableUtils::is_delimiter_row(""));
1058        assert!(!TableUtils::is_delimiter_row("|||"));
1059        assert!(!TableUtils::is_delimiter_row("| | |"));
1060
1061        // Must have dashes
1062        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1063        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1064
1065        // Mixed content
1066        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1067        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1068    }
1069
1070    #[test]
1071    fn test_count_cells() {
1072        // Basic counts
1073        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1074        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1075        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1076        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1077
1078        // Single cell
1079        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1080        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1081
1082        // Empty cells
1083        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1084        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1085
1086        // Many cells
1087        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1088
1089        // Edge cases
1090        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1091        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1092
1093        // No table
1094        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1095        assert_eq!(TableUtils::count_cells(""), 0);
1096        assert_eq!(TableUtils::count_cells("   "), 0);
1097
1098        // Whitespace handling
1099        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1100        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1101    }
1102
1103    #[test]
1104    fn test_count_cells_with_escaped_pipes() {
1105        // Pipes inside code spans are treated as content, not cell delimiters.
1106        // To include a literal pipe outside code spans, escape it with \|.
1107
1108        // Basic table structure
1109        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1110        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1111        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1112
1113        // Escaped pipes: \| keeps the pipe as content
1114        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1115        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1116
1117        // Escaped pipes inside backticks
1118        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1119
1120        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1121        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1122        // Double backslash inside backticks: pipe is still masked by code span
1123        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 2);
1124
1125        // Pipes inside code spans are content, not delimiters
1126        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
1127        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
1128        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
1129
1130        // Regex example - pipes in code spans are masked
1131        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 2);
1132        // Escaped pipe inside code is also masked (escape is redundant here)
1133        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1134    }
1135
1136    #[test]
1137    fn test_determine_pipe_style() {
1138        // All pipe styles
1139        assert_eq!(
1140            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1141            Some("leading_and_trailing")
1142        );
1143        assert_eq!(
1144            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1145            Some("leading_only")
1146        );
1147        assert_eq!(
1148            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1149            Some("trailing_only")
1150        );
1151        assert_eq!(
1152            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1153            Some("no_leading_or_trailing")
1154        );
1155
1156        // With whitespace
1157        assert_eq!(
1158            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1159            Some("leading_and_trailing")
1160        );
1161        assert_eq!(
1162            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1163            Some("leading_only")
1164        );
1165
1166        // No pipes
1167        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1168        assert_eq!(TableUtils::determine_pipe_style(""), None);
1169        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1170
1171        // Single pipe cases
1172        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1173        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1174        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1175    }
1176
1177    #[test]
1178    fn test_find_table_blocks_simple() {
1179        let content = "| Header 1 | Header 2 |
1180|-----------|-----------|
1181| Cell 1    | Cell 2    |
1182| Cell 3    | Cell 4    |";
1183
1184        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1185
1186        let tables = TableUtils::find_table_blocks(content, &ctx);
1187        assert_eq!(tables.len(), 1);
1188
1189        let table = &tables[0];
1190        assert_eq!(table.start_line, 0);
1191        assert_eq!(table.end_line, 3);
1192        assert_eq!(table.header_line, 0);
1193        assert_eq!(table.delimiter_line, 1);
1194        assert_eq!(table.content_lines, vec![2, 3]);
1195    }
1196
1197    #[test]
1198    fn test_find_table_blocks_multiple() {
1199        let content = "Some text
1200
1201| Table 1 | Col A |
1202|----------|-------|
1203| Data 1   | Val 1 |
1204
1205More text
1206
1207| Table 2 | Col 2 |
1208|----------|-------|
1209| Data 2   | Data  |";
1210
1211        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1212
1213        let tables = TableUtils::find_table_blocks(content, &ctx);
1214        assert_eq!(tables.len(), 2);
1215
1216        // First table
1217        assert_eq!(tables[0].start_line, 2);
1218        assert_eq!(tables[0].end_line, 4);
1219        assert_eq!(tables[0].header_line, 2);
1220        assert_eq!(tables[0].delimiter_line, 3);
1221        assert_eq!(tables[0].content_lines, vec![4]);
1222
1223        // Second table
1224        assert_eq!(tables[1].start_line, 8);
1225        assert_eq!(tables[1].end_line, 10);
1226        assert_eq!(tables[1].header_line, 8);
1227        assert_eq!(tables[1].delimiter_line, 9);
1228        assert_eq!(tables[1].content_lines, vec![10]);
1229    }
1230
1231    #[test]
1232    fn test_find_table_blocks_no_content_rows() {
1233        let content = "| Header 1 | Header 2 |
1234|-----------|-----------|
1235
1236Next paragraph";
1237
1238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1239
1240        let tables = TableUtils::find_table_blocks(content, &ctx);
1241        assert_eq!(tables.len(), 1);
1242
1243        let table = &tables[0];
1244        assert_eq!(table.start_line, 0);
1245        assert_eq!(table.end_line, 1); // Just header and delimiter
1246        assert_eq!(table.content_lines.len(), 0);
1247    }
1248
1249    #[test]
1250    fn test_find_table_blocks_in_code_block() {
1251        let content = "```
1252| Not | A | Table |
1253|-----|---|-------|
1254| In  | Code | Block |
1255```
1256
1257| Real | Table |
1258|------|-------|
1259| Data | Here  |";
1260
1261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1262
1263        let tables = TableUtils::find_table_blocks(content, &ctx);
1264        assert_eq!(tables.len(), 1); // Only the table outside code block
1265
1266        let table = &tables[0];
1267        assert_eq!(table.header_line, 6);
1268        assert_eq!(table.delimiter_line, 7);
1269    }
1270
1271    #[test]
1272    fn test_find_table_blocks_no_tables() {
1273        let content = "Just regular text
1274No tables here
1275- List item with | pipe
1276* Another list item";
1277
1278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1279
1280        let tables = TableUtils::find_table_blocks(content, &ctx);
1281        assert_eq!(tables.len(), 0);
1282    }
1283
1284    #[test]
1285    fn test_find_table_blocks_malformed() {
1286        let content = "| Header without delimiter |
1287| This looks like table |
1288But no delimiter row
1289
1290| Proper | Table |
1291|---------|-------|
1292| Data    | Here  |";
1293
1294        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1295
1296        let tables = TableUtils::find_table_blocks(content, &ctx);
1297        assert_eq!(tables.len(), 1); // Only the proper table
1298        assert_eq!(tables[0].header_line, 4);
1299    }
1300
1301    #[test]
1302    fn test_edge_cases() {
1303        // Test empty content
1304        assert!(!TableUtils::is_potential_table_row(""));
1305        assert!(!TableUtils::is_delimiter_row(""));
1306        assert_eq!(TableUtils::count_cells(""), 0);
1307        assert_eq!(TableUtils::determine_pipe_style(""), None);
1308
1309        // Test whitespace only
1310        assert!(!TableUtils::is_potential_table_row("   "));
1311        assert!(!TableUtils::is_delimiter_row("   "));
1312        assert_eq!(TableUtils::count_cells("   "), 0);
1313        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1314
1315        // Test single character
1316        assert!(!TableUtils::is_potential_table_row("|"));
1317        assert!(!TableUtils::is_delimiter_row("|"));
1318        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1319
1320        // Test very long lines are valid table rows (no length limit)
1321        // Test both single-column and multi-column long lines
1322        let long_single = format!("| {} |", "a".repeat(200));
1323        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1324
1325        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1326        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1327
1328        // Test unicode
1329        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1330        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1331        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1332    }
1333
1334    #[test]
1335    fn test_table_block_struct() {
1336        let block = TableBlock {
1337            start_line: 0,
1338            end_line: 5,
1339            header_line: 0,
1340            delimiter_line: 1,
1341            content_lines: vec![2, 3, 4, 5],
1342            list_context: None,
1343        };
1344
1345        // Test Debug trait
1346        let debug_str = format!("{block:?}");
1347        assert!(debug_str.contains("TableBlock"));
1348        assert!(debug_str.contains("start_line: 0"));
1349
1350        // Test Clone trait
1351        let cloned = block.clone();
1352        assert_eq!(cloned.start_line, block.start_line);
1353        assert_eq!(cloned.end_line, block.end_line);
1354        assert_eq!(cloned.header_line, block.header_line);
1355        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1356        assert_eq!(cloned.content_lines, block.content_lines);
1357        assert!(cloned.list_context.is_none());
1358    }
1359
1360    #[test]
1361    fn test_split_table_row() {
1362        // Basic split
1363        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1364        assert_eq!(cells.len(), 3);
1365        assert_eq!(cells[0].trim(), "Cell 1");
1366        assert_eq!(cells[1].trim(), "Cell 2");
1367        assert_eq!(cells[2].trim(), "Cell 3");
1368
1369        // Without trailing pipe
1370        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1371        assert_eq!(cells.len(), 2);
1372
1373        // Empty cells
1374        let cells = TableUtils::split_table_row("| | | |");
1375        assert_eq!(cells.len(), 3);
1376
1377        // Single cell
1378        let cells = TableUtils::split_table_row("| Cell |");
1379        assert_eq!(cells.len(), 1);
1380        assert_eq!(cells[0].trim(), "Cell");
1381
1382        // No pipes
1383        let cells = TableUtils::split_table_row("No pipes here");
1384        assert_eq!(cells.len(), 0);
1385    }
1386
1387    #[test]
1388    fn test_split_table_row_with_escaped_pipes() {
1389        // Escaped pipes should be preserved in cell content
1390        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1391        assert_eq!(cells.len(), 2);
1392        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1393
1394        // Double backslash + pipe is NOT escaped
1395        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1396        assert_eq!(cells.len(), 3);
1397    }
1398
1399    #[test]
1400    fn test_split_table_row_with_flavor_mkdocs() {
1401        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1402        let cells =
1403            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1404        assert_eq!(cells.len(), 2);
1405        assert!(
1406            cells[1].contains("`x | y`"),
1407            "Inline code with pipe should be single cell in MkDocs flavor"
1408        );
1409
1410        // Multiple pipes in inline code
1411        let cells =
1412            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1413        assert_eq!(cells.len(), 2);
1414        assert!(cells[1].contains("`a | b | c`"));
1415    }
1416
1417    #[test]
1418    fn test_split_table_row_with_flavor_standard() {
1419        // Pipes in inline code are NOT cell delimiters for any flavor
1420        let cells =
1421            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1422        assert_eq!(
1423            cells.len(),
1424            2,
1425            "Pipes in code spans should not be cell delimiters, got {cells:?}"
1426        );
1427        assert!(
1428            cells[1].contains("`x | y`"),
1429            "Inline code with pipe should be single cell"
1430        );
1431    }
1432
1433    // === extract_blockquote_prefix tests ===
1434
1435    #[test]
1436    fn test_extract_blockquote_prefix_no_blockquote() {
1437        // Regular table row without blockquote
1438        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1439        assert_eq!(prefix, "");
1440        assert_eq!(content, "| H1 | H2 |");
1441    }
1442
1443    #[test]
1444    fn test_extract_blockquote_prefix_single_level() {
1445        // Single blockquote level
1446        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1447        assert_eq!(prefix, "> ");
1448        assert_eq!(content, "| H1 | H2 |");
1449    }
1450
1451    #[test]
1452    fn test_extract_blockquote_prefix_double_level() {
1453        // Double blockquote level
1454        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1455        assert_eq!(prefix, ">> ");
1456        assert_eq!(content, "| H1 | H2 |");
1457    }
1458
1459    #[test]
1460    fn test_extract_blockquote_prefix_triple_level() {
1461        // Triple blockquote level
1462        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1463        assert_eq!(prefix, ">>> ");
1464        assert_eq!(content, "| H1 | H2 |");
1465    }
1466
1467    #[test]
1468    fn test_extract_blockquote_prefix_with_spaces() {
1469        // Blockquote with spaces between markers
1470        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1471        assert_eq!(prefix, "> > ");
1472        assert_eq!(content, "| H1 | H2 |");
1473    }
1474
1475    #[test]
1476    fn test_extract_blockquote_prefix_indented() {
1477        // Indented blockquote
1478        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1479        assert_eq!(prefix, "  > ");
1480        assert_eq!(content, "| H1 | H2 |");
1481    }
1482
1483    #[test]
1484    fn test_extract_blockquote_prefix_no_space_after() {
1485        // Blockquote without space after marker
1486        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1487        assert_eq!(prefix, ">");
1488        assert_eq!(content, "| H1 | H2 |");
1489    }
1490
1491    #[test]
1492    fn test_determine_pipe_style_in_blockquote() {
1493        // determine_pipe_style should handle blockquotes correctly
1494        assert_eq!(
1495            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1496            Some("leading_and_trailing")
1497        );
1498        assert_eq!(
1499            TableUtils::determine_pipe_style("> H1 | H2"),
1500            Some("no_leading_or_trailing")
1501        );
1502        assert_eq!(
1503            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1504            Some("leading_and_trailing")
1505        );
1506        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1507    }
1508
1509    #[test]
1510    fn test_list_table_delimiter_requires_indentation() {
1511        // Test case: list item contains pipe, but delimiter line is at column 1
1512        // This should NOT be detected as a list table since the delimiter has no indentation.
1513        // The result is a non-list table starting at line 0 (the list item becomes the header)
1514        // but list_context should be None.
1515        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1516        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1517        let tables = TableUtils::find_table_blocks(content, &ctx);
1518
1519        // The table will be detected starting at line 0, but crucially it should NOT have
1520        // list_context set, meaning it won't be treated as a list-table for column count purposes
1521        assert_eq!(tables.len(), 1, "Should find exactly one table");
1522        assert!(
1523            tables[0].list_context.is_none(),
1524            "Should NOT have list context since delimiter has no indentation"
1525        );
1526    }
1527
1528    #[test]
1529    fn test_list_table_with_properly_indented_delimiter() {
1530        // Test case: list item with table header, delimiter properly indented
1531        // This SHOULD be detected as a list table
1532        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1533        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1534        let tables = TableUtils::find_table_blocks(content, &ctx);
1535
1536        // Should find exactly one list-table starting at line 0
1537        assert_eq!(tables.len(), 1, "Should find exactly one table");
1538        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1539        assert!(
1540            tables[0].list_context.is_some(),
1541            "Should be a list table since delimiter is properly indented"
1542        );
1543    }
1544
1545    #[test]
1546    fn test_mask_pipes_in_inline_code_regular_backticks() {
1547        // Regular backtick code span: pipe should be masked
1548        let result = TableUtils::mask_pipes_in_inline_code("| `code | here` |");
1549        assert_eq!(result, "| `code _ here` |");
1550    }
1551
1552    #[test]
1553    fn test_mask_pipes_in_inline_code_escaped_backtick_not_code_span() {
1554        // Escaped backtick (\`) is literal text, not a code span opener.
1555        // The pipe should NOT be masked.
1556        let result = TableUtils::mask_pipes_in_inline_code(r"| \`not code | still pipe\` |");
1557        assert_eq!(result, r"| \`not code | still pipe\` |");
1558    }
1559
1560    #[test]
1561    fn test_mask_pipes_in_inline_code_escaped_backslash_then_backtick() {
1562        // Escaped backslash (\\) followed by backtick: the backtick IS a code span opener.
1563        // The pipe inside the code span SHOULD be masked.
1564        let result = TableUtils::mask_pipes_in_inline_code(r"| \\`real code | masked\\` |");
1565        // \\` = escaped backslash + real backtick (code span opener)
1566        // The pipe between the backticks should be masked
1567        assert_eq!(result, r"| \\`real code _ masked\\` |");
1568    }
1569
1570    #[test]
1571    fn test_mask_pipes_in_inline_code_triple_backslash_before_backtick() {
1572        // Three backslashes before backtick: odd count means backtick is escaped
1573        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\`not code | pipe\\\` |");
1574        assert_eq!(result, r"| \\\`not code | pipe\\\` |");
1575    }
1576
1577    #[test]
1578    fn test_mask_pipes_in_inline_code_four_backslashes_before_backtick() {
1579        // Four backslashes before backtick: even count means backtick is a real delimiter
1580        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\\`code | here\\\\` |");
1581        assert_eq!(result, r"| \\\\`code _ here\\\\` |");
1582    }
1583
1584    #[test]
1585    fn test_mask_pipes_in_inline_code_no_backslash() {
1586        // No backslashes at all: standard behavior, pipe inside code span is masked
1587        let result = TableUtils::mask_pipes_in_inline_code("before `a | b` after");
1588        assert_eq!(result, "before `a _ b` after");
1589    }
1590
1591    #[test]
1592    fn test_mask_pipes_in_inline_code_no_code_span() {
1593        // No backticks at all: nothing should be masked
1594        let result = TableUtils::mask_pipes_in_inline_code("| col1 | col2 |");
1595        assert_eq!(result, "| col1 | col2 |");
1596    }
1597
1598    #[test]
1599    fn test_mask_pipes_in_inline_code_backslash_before_closing_backtick() {
1600        // Per CommonMark spec, backslash escapes do NOT work inside code spans.
1601        // Inside a code span, `\` is a literal character. So `foo\` is a valid
1602        // code span containing "foo\", and the closing backtick is NOT escaped.
1603        //
1604        // Input: | `foo\` | bar |
1605        // The code span is `foo\` (backtick opens, backslash is literal, backtick closes).
1606        // The pipe after the code span is a real delimiter, producing 2 cells.
1607        // The pipe inside the code span should be left alone (there isn't one here).
1608        let result = TableUtils::mask_pipes_in_inline_code(r"| `foo\` | bar |");
1609        // The backslash before closing backtick is literal inside the code span,
1610        // so the code span closes at that backtick. The pipe between cells is NOT masked.
1611        assert_eq!(result, r"| `foo\` | bar |");
1612    }
1613
1614    #[test]
1615    fn test_mask_pipes_in_inline_code_backslash_literal_with_pipe_inside() {
1616        // Code span contains a backslash and a pipe: `a\|b`
1617        // The backslash is literal inside the code span (CommonMark spec).
1618        // The pipe is inside the code span, so it should be masked.
1619        let result = TableUtils::mask_pipes_in_inline_code(r"| `a\|b` | col2 |");
1620        assert_eq!(result, r"| `a\_b` | col2 |");
1621    }
1622
1623    #[test]
1624    fn test_count_preceding_backslashes() {
1625        let chars: Vec<char> = r"abc\\\`def".chars().collect();
1626        // Position of backtick is at index 6 (a=0, b=1, c=2, \=3, \=4, \=5, `=6)
1627        assert_eq!(TableUtils::count_preceding_backslashes(&chars, 6), 3);
1628
1629        let chars2: Vec<char> = r"abc\\`def".chars().collect();
1630        // Position of backtick is at index 5
1631        assert_eq!(TableUtils::count_preceding_backslashes(&chars2, 5), 2);
1632
1633        let chars3: Vec<char> = "`def".chars().collect();
1634        // Position of backtick is at index 0 -- no preceding chars
1635        assert_eq!(TableUtils::count_preceding_backslashes(&chars3, 0), 0);
1636    }
1637
1638    #[test]
1639    fn test_has_unescaped_pipe_backslash_literal_in_code_span() {
1640        // Per CommonMark: backslashes are literal inside code spans.
1641        // `foo\` is a complete code span, so the pipe after it is outside code.
1642        assert!(TableUtils::has_unescaped_pipe_outside_inline_code(r"`foo\` | bar"));
1643
1644        // Escaped backtick outside code span: \` is not a code span opener
1645        assert!(TableUtils::has_unescaped_pipe_outside_inline_code(r"\`foo | bar\`"));
1646
1647        // Pipe inside code span should not count
1648        assert!(!TableUtils::has_unescaped_pipe_outside_inline_code(r"`foo | bar`"));
1649    }
1650}