Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Returns true if the line has at least one unescaped pipe separator outside inline code spans.
33    ///
34    /// This helps distinguish actual table separators from command/prose examples like
35    /// `` `echo a | sed 's/a/b/'` `` where the pipe is fully inside inline code.
36    fn has_unescaped_pipe_outside_inline_code(text: &str) -> bool {
37        let chars: Vec<char> = text.chars().collect();
38        let mut i = 0;
39        let mut in_code = false;
40        let mut code_delim_len = 0usize;
41
42        while i < chars.len() {
43            let ch = chars[i];
44
45            if ch == '\\' && !in_code {
46                // Skip escaped character (only outside code spans —
47                // backslashes are literal inside code spans per CommonMark).
48                i += if i + 1 < chars.len() { 2 } else { 1 };
49                continue;
50            }
51
52            if ch == '`' {
53                let mut run = 1usize;
54                while i + run < chars.len() && chars[i + run] == '`' {
55                    run += 1;
56                }
57
58                if in_code {
59                    if run == code_delim_len {
60                        in_code = false;
61                        code_delim_len = 0;
62                    }
63                } else {
64                    in_code = true;
65                    code_delim_len = run;
66                }
67
68                i += run;
69                continue;
70            }
71
72            if ch == '|' && !in_code {
73                return true;
74            }
75
76            i += 1;
77        }
78
79        false
80    }
81
82    /// Check if a line looks like a potential table row
83    pub fn is_potential_table_row(line: &str) -> bool {
84        let trimmed = line.trim();
85        if trimmed.is_empty() || !trimmed.contains('|') {
86            return false;
87        }
88
89        // Skip lines that are clearly not table rows
90        // Unordered list items with space or tab after marker
91        if trimmed.starts_with("- ")
92            || trimmed.starts_with("* ")
93            || trimmed.starts_with("+ ")
94            || trimmed.starts_with("-\t")
95            || trimmed.starts_with("*\t")
96            || trimmed.starts_with("+\t")
97        {
98            return false;
99        }
100
101        // Skip ordered list items: digits followed by . or ) then space/tab
102        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
103            && first_non_digit > 0
104        {
105            let after_digits = &trimmed[first_non_digit..];
106            if after_digits.starts_with(". ")
107                || after_digits.starts_with(".\t")
108                || after_digits.starts_with(") ")
109                || after_digits.starts_with(")\t")
110            {
111                return false;
112            }
113        }
114
115        // Skip ATX headings (# through ######)
116        if trimmed.starts_with('#') {
117            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
118            if hash_count <= 6 {
119                let after_hashes = &trimmed[hash_count..];
120                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
121                    return false;
122                }
123            }
124        }
125
126        // For rows without explicit outer pipes, require a real separator outside
127        // inline code spans to avoid prose/command false positives.
128        let has_outer_pipes = trimmed.starts_with('|') && trimmed.ends_with('|');
129        if !has_outer_pipes && !Self::has_unescaped_pipe_outside_inline_code(trimmed) {
130            return false;
131        }
132
133        // Must have at least 2 parts when split by |
134        let parts: Vec<&str> = trimmed.split('|').collect();
135        if parts.len() < 2 {
136            return false;
137        }
138
139        // Check if it looks like a table row by having reasonable content between pipes
140        let mut valid_parts = 0;
141        let mut total_non_empty_parts = 0;
142
143        for part in &parts {
144            let part_trimmed = part.trim();
145            // Skip empty parts (from leading/trailing pipes)
146            if part_trimmed.is_empty() {
147                continue;
148            }
149            total_non_empty_parts += 1;
150
151            // Count parts that look like table cells (reasonable content, no newlines)
152            if !part_trimmed.contains('\n') {
153                valid_parts += 1;
154            }
155        }
156
157        // Check if all non-empty parts are valid (no newlines)
158        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
159            // Some cells contain newlines, not a valid table row
160            return false;
161        }
162
163        // GFM allows tables with all empty cells (e.g., |||)
164        // These are valid if they have proper table formatting (leading and trailing pipes)
165        if total_non_empty_parts == 0 {
166            // Empty cells are only valid with proper pipe formatting
167            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
168        }
169
170        // GFM allows single-column tables, so >= 1 valid part is enough
171        // when the line has proper table formatting (pipes)
172        if trimmed.starts_with('|') && trimmed.ends_with('|') {
173            // Properly formatted table row with pipes on both ends
174            valid_parts >= 1
175        } else {
176            // For rows without proper pipe formatting, require at least 2 cells
177            valid_parts >= 2
178        }
179    }
180
181    /// Check if a line is a table delimiter row (e.g., |---|---|)
182    pub fn is_delimiter_row(line: &str) -> bool {
183        let trimmed = line.trim();
184        if !trimmed.contains('|') || !trimmed.contains('-') {
185            return false;
186        }
187
188        // Split by pipes and check each part
189        let parts: Vec<&str> = trimmed.split('|').collect();
190        let mut valid_delimiter_parts = 0;
191        let mut total_non_empty_parts = 0;
192
193        for part in &parts {
194            let part_trimmed = part.trim();
195            if part_trimmed.is_empty() {
196                continue; // Skip empty parts from leading/trailing pipes
197            }
198
199            total_non_empty_parts += 1;
200
201            // Check if this part looks like a delimiter (contains dashes and optionally colons)
202            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
203                valid_delimiter_parts += 1;
204            }
205        }
206
207        // All non-empty parts must be valid delimiters, and there must be at least one
208        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
209    }
210
211    /// Strip blockquote prefix from a line, returning the content without the prefix
212    fn strip_blockquote_prefix(line: &str) -> &str {
213        let trimmed = line.trim_start();
214        if trimmed.starts_with('>') {
215            // Strip all blockquote markers and following space
216            let mut rest = trimmed;
217            while rest.starts_with('>') {
218                rest = rest.strip_prefix('>').unwrap_or(rest);
219                rest = rest.trim_start_matches(' ');
220            }
221            rest
222        } else {
223            line
224        }
225    }
226
227    /// Find all table blocks in the content with optimized detection
228    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
229    pub fn find_table_blocks_with_code_info(
230        content: &str,
231        code_blocks: &[(usize, usize)],
232        code_spans: &[crate::lint_context::CodeSpan],
233        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
234    ) -> Vec<TableBlock> {
235        let lines: Vec<&str> = content.lines().collect();
236        let mut tables = Vec::new();
237        let mut i = 0;
238
239        // Pre-compute line positions for efficient code block checking
240        let mut line_positions = Vec::with_capacity(lines.len());
241        let mut pos = 0;
242        for line in &lines {
243            line_positions.push(pos);
244            pos += line.len() + 1; // +1 for newline
245        }
246
247        // Stack of active list content indents for continuation table tracking.
248        // Supports nested lists: when a child list is seen, we push; when we
249        // dedent past a level, we pop back to the enclosing list.
250        let mut list_indent_stack: Vec<usize> = Vec::new();
251
252        while i < lines.len() {
253            // Skip lines in code blocks, code spans, or HTML comments
254            let line_start = line_positions[i];
255            let in_code =
256                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start) || {
257                    // Binary search on sorted code spans
258                    let idx = code_spans.partition_point(|span| span.byte_offset <= line_start);
259                    idx > 0 && line_start < code_spans[idx - 1].byte_end
260                };
261            let in_html_comment = {
262                // Binary search on sorted HTML comment ranges
263                let idx = html_comment_ranges.partition_point(|range| range.start <= line_start);
264                idx > 0 && line_start < html_comment_ranges[idx - 1].end
265            };
266
267            if in_code || in_html_comment {
268                i += 1;
269                continue;
270            }
271
272            // Strip blockquote prefix for table detection
273            let line_content = Self::strip_blockquote_prefix(lines[i]);
274
275            // Update active list tracking
276            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
277            if !list_prefix.is_empty() {
278                // Line has a list marker. Pop any deeper/equal levels, then push this one.
279                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
280                    list_indent_stack.pop();
281                }
282                list_indent_stack.push(content_indent);
283            } else if !line_content.trim().is_empty() {
284                // Non-blank line without a marker: pop any levels we've dedented past
285                let leading = line_content.len() - line_content.trim_start().len();
286                while list_indent_stack.last().is_some_and(|&top| leading < top) {
287                    list_indent_stack.pop();
288                }
289            }
290            // Blank lines keep the stack unchanged (blank lines don't end list items)
291
292            // Check if this is a list item that contains a table row on the same line,
293            // or a continuation table indented under an active list item
294            let (is_same_line_list_table, effective_content) =
295                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
296                    (true, list_content)
297                } else {
298                    (false, line_content)
299                };
300
301            // Detect continuation list tables: no marker on this line, but indented
302            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
303            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
304                let leading = line_content.len() - line_content.trim_start().len();
305                // Find the deepest list level this line is indented under
306                list_indent_stack
307                    .iter()
308                    .rev()
309                    .find(|&&indent| leading >= indent)
310                    .copied()
311            } else {
312                None
313            };
314
315            let is_continuation_list_table = continuation_indent.is_some()
316                && {
317                    let indent = continuation_indent.unwrap();
318                    let leading = line_content.len() - line_content.trim_start().len();
319                    // Per CommonMark, 4+ spaces beyond content indent is a code block
320                    leading < indent + 4
321                }
322                && Self::is_potential_table_row(effective_content);
323
324            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
325
326            // For continuation list tables, use the matched list indent
327            let effective_content_indent = if is_same_line_list_table {
328                content_indent
329            } else if is_continuation_list_table {
330                continuation_indent.unwrap()
331            } else {
332                0
333            };
334
335            // Look for potential table start
336            if is_any_list_table || Self::is_potential_table_row(effective_content) {
337                // For list tables (same-line or continuation), check indented continuation lines
338                // For regular tables, check the next line directly
339                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
340                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
341                    if is_any_list_table {
342                        // Verify the delimiter line has proper indentation
343                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
344                        if leading_spaces >= effective_content_indent {
345                            // Has proper indentation, strip it and check as delimiter
346                            (
347                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
348                                true,
349                            )
350                        } else {
351                            // Not enough indentation - not a list table
352                            (next_raw, false)
353                        }
354                    } else {
355                        (next_raw, true)
356                    }
357                } else {
358                    ("", true)
359                };
360
361                // For list tables, only accept if delimiter has valid indentation
362                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
363
364                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
365                    // Found a table! Find its end
366                    let table_start = i;
367                    let header_line = i;
368                    let delimiter_line = i + 1;
369                    let mut table_end = i + 1; // Include the delimiter row
370                    let mut content_lines = Vec::new();
371
372                    // Continue while we have table rows
373                    let mut j = i + 2;
374                    while j < lines.len() {
375                        let line = lines[j];
376                        // Strip blockquote prefix for checking
377                        let raw_content = Self::strip_blockquote_prefix(line);
378
379                        // For list tables, strip expected indentation
380                        let line_content = if effective_is_list_table {
381                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
382                        } else {
383                            raw_content
384                        };
385
386                        if line_content.trim().is_empty() {
387                            // Empty line ends the table
388                            break;
389                        }
390
391                        // For list tables, the continuation line must have proper indentation
392                        if effective_is_list_table {
393                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
394                            if leading_spaces < effective_content_indent {
395                                // Not enough indentation - end of table
396                                break;
397                            }
398                        }
399
400                        if Self::is_potential_table_row(line_content) {
401                            content_lines.push(j);
402                            table_end = j;
403                            j += 1;
404                        } else {
405                            // Non-table line ends the table
406                            break;
407                        }
408                    }
409
410                    let list_context = if effective_is_list_table {
411                        if is_same_line_list_table {
412                            // Same-line: prefix is the actual list marker (e.g., "- ")
413                            Some(ListTableContext {
414                                list_prefix: list_prefix.to_string(),
415                                content_indent: effective_content_indent,
416                            })
417                        } else {
418                            // Continuation: prefix is the indentation spaces
419                            Some(ListTableContext {
420                                list_prefix: " ".repeat(effective_content_indent),
421                                content_indent: effective_content_indent,
422                            })
423                        }
424                    } else {
425                        None
426                    };
427
428                    tables.push(TableBlock {
429                        start_line: table_start,
430                        end_line: table_end,
431                        header_line,
432                        delimiter_line,
433                        content_lines,
434                        list_context,
435                    });
436                    i = table_end + 1;
437                } else {
438                    i += 1;
439                }
440            } else {
441                i += 1;
442            }
443        }
444
445        tables
446    }
447
448    /// Strip list continuation indentation from a line.
449    /// For lines that are continuations of a list item's content, strip the expected indent.
450    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
451        let bytes = line.as_bytes();
452        let mut spaces = 0;
453
454        for &b in bytes {
455            if b == b' ' {
456                spaces += 1;
457            } else if b == b'\t' {
458                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
459                spaces = (spaces / 4 + 1) * 4;
460            } else {
461                break;
462            }
463
464            if spaces >= expected_indent {
465                break;
466            }
467        }
468
469        // Strip at most expected_indent characters
470        let strip_count = spaces.min(expected_indent).min(line.len());
471        // Count actual bytes to strip (handling tabs)
472        let mut byte_count = 0;
473        let mut counted_spaces = 0;
474        for &b in bytes {
475            if counted_spaces >= strip_count {
476                break;
477            }
478            if b == b' ' {
479                counted_spaces += 1;
480                byte_count += 1;
481            } else if b == b'\t' {
482                counted_spaces = (counted_spaces / 4 + 1) * 4;
483                byte_count += 1;
484            } else {
485                break;
486            }
487        }
488
489        &line[byte_count..]
490    }
491
492    /// Find all table blocks in the content with optimized detection
493    /// This is a backward-compatible wrapper that accepts LintContext
494    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
495        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
496    }
497
498    /// Count the number of cells in a table row
499    pub fn count_cells(row: &str) -> usize {
500        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
501    }
502
503    /// Count the number of cells in a table row with flavor-specific behavior
504    ///
505    /// Pipes inside code spans are treated as content, not cell delimiters.
506    ///
507    /// This function strips blockquote prefixes before counting cells, so it works
508    /// correctly for tables inside blockquotes.
509    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
510        // Strip blockquote prefix if present before counting cells
511        let (_, content) = Self::extract_blockquote_prefix(row);
512        Self::split_table_row_with_flavor(content, flavor).len()
513    }
514
515    /// Count the number of consecutive backslashes immediately preceding `pos` in `chars`.
516    fn count_preceding_backslashes(chars: &[char], pos: usize) -> usize {
517        let mut count = 0;
518        let mut k = pos;
519        while k > 0 {
520            k -= 1;
521            if chars[k] == '\\' {
522                count += 1;
523            } else {
524                break;
525            }
526        }
527        count
528    }
529
530    /// Mask pipes inside inline code blocks with a placeholder character.
531    ///
532    /// Backticks preceded by an odd number of backslashes are escaped (literal text)
533    /// and do not open or close code spans. An even number of backslashes means the
534    /// backslashes themselves are escaped, so the backtick is a real delimiter.
535    pub fn mask_pipes_in_inline_code(text: &str) -> String {
536        let mut result = String::new();
537        let chars: Vec<char> = text.chars().collect();
538        let mut i = 0;
539
540        while i < chars.len() {
541            if chars[i] == '`' {
542                // A backtick preceded by an odd number of backslashes is escaped
543                let preceding = Self::count_preceding_backslashes(&chars, i);
544                if preceding % 2 != 0 {
545                    // Escaped backtick -- treat as literal text, not a code span opener
546                    result.push(chars[i]);
547                    i += 1;
548                    continue;
549                }
550
551                // Count consecutive backticks at start
552                let start = i;
553                let mut backtick_count = 0;
554                while i < chars.len() && chars[i] == '`' {
555                    backtick_count += 1;
556                    i += 1;
557                }
558
559                // Look for matching closing backticks
560                let mut found_closing = false;
561                let mut j = i;
562
563                while j < chars.len() {
564                    if chars[j] == '`' {
565                        // Per CommonMark spec, backslash escapes do NOT work inside code
566                        // spans -- all characters including backslashes are literal. So we
567                        // do NOT check count_preceding_backslashes here (only for the
568                        // opening backtick above).
569
570                        // Count potential closing backticks
571                        let close_start = j;
572                        let mut close_count = 0;
573                        while j < chars.len() && chars[j] == '`' {
574                            close_count += 1;
575                            j += 1;
576                        }
577
578                        if close_count == backtick_count {
579                            // Found matching closing backticks
580                            found_closing = true;
581
582                            // Valid inline code - add with pipes masked
583                            result.extend(chars[start..i].iter());
584
585                            for &ch in chars.iter().take(close_start).skip(i) {
586                                if ch == '|' {
587                                    result.push('_'); // Mask pipe with underscore
588                                } else {
589                                    result.push(ch);
590                                }
591                            }
592
593                            result.extend(chars[close_start..j].iter());
594                            i = j;
595                            break;
596                        }
597                        // If not matching, continue searching (j is already past these backticks)
598                    } else {
599                        j += 1;
600                    }
601                }
602
603                if !found_closing {
604                    // No matching closing found, treat as regular text
605                    result.extend(chars[start..i].iter());
606                }
607            } else {
608                result.push(chars[i]);
609                i += 1;
610            }
611        }
612
613        result
614    }
615
616    /// Mask escaped pipes for accurate table cell parsing
617    ///
618    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
619    /// - `\|` → escaped pipe → masked (stays as cell content)
620    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
621    ///
622    /// This function only handles escaped pipes. Pipes inside inline code spans
623    /// are handled separately by `mask_pipes_in_inline_code`.
624    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
625        let mut result = String::new();
626        let chars: Vec<char> = text.chars().collect();
627        let mut i = 0;
628
629        while i < chars.len() {
630            if chars[i] == '\\' {
631                if i + 1 < chars.len() && chars[i + 1] == '\\' {
632                    // Escaped backslash: \\ → push both and continue
633                    // The next character (if it's a pipe) will be a real delimiter
634                    result.push('\\');
635                    result.push('\\');
636                    i += 2;
637                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
638                    // Escaped pipe: \| → mask the pipe
639                    result.push('\\');
640                    result.push('_'); // Mask the pipe
641                    i += 2;
642                } else {
643                    // Single backslash not followed by \ or | → just push it
644                    result.push(chars[i]);
645                    i += 1;
646                }
647            } else {
648                result.push(chars[i]);
649                i += 1;
650            }
651        }
652
653        result
654    }
655
656    /// Split a table row into individual cell contents with flavor-specific behavior.
657    ///
658    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
659    /// This is the foundation for both cell counting and cell content extraction.
660    ///
661    /// Pipes inside code spans are treated as content, not cell delimiters.
662    pub fn split_table_row_with_flavor(row: &str, _flavor: crate::config::MarkdownFlavor) -> Vec<String> {
663        let trimmed = row.trim();
664
665        if !trimmed.contains('|') {
666            return Vec::new();
667        }
668
669        // First, mask escaped pipes (same for all flavors)
670        let masked = Self::mask_pipes_for_table_parsing(trimmed);
671
672        // Mask pipes inside inline code for all flavors
673        let final_masked = Self::mask_pipes_in_inline_code(&masked);
674
675        let has_leading = final_masked.starts_with('|');
676        let has_trailing = final_masked.ends_with('|');
677
678        let mut masked_content = final_masked.as_str();
679        let mut orig_content = trimmed;
680
681        if has_leading {
682            masked_content = &masked_content[1..];
683            orig_content = &orig_content[1..];
684        }
685
686        // Track whether we actually strip a trailing pipe
687        let stripped_trailing = has_trailing && !masked_content.is_empty();
688        if stripped_trailing {
689            masked_content = &masked_content[..masked_content.len() - 1];
690            orig_content = &orig_content[..orig_content.len() - 1];
691        }
692
693        // Handle edge cases for degenerate inputs
694        if masked_content.is_empty() {
695            if stripped_trailing {
696                // "||" case: two pipes with empty content between = one empty cell
697                return vec![String::new()];
698            } else {
699                // "|" case: single pipe, not a valid table row
700                return Vec::new();
701            }
702        }
703
704        let masked_parts: Vec<&str> = masked_content.split('|').collect();
705        let mut cells = Vec::new();
706        let mut pos = 0;
707
708        for masked_cell in masked_parts {
709            let cell_len = masked_cell.len();
710            let orig_cell = if pos + cell_len <= orig_content.len() {
711                &orig_content[pos..pos + cell_len]
712            } else {
713                masked_cell
714            };
715            cells.push(orig_cell.to_string());
716            pos += cell_len + 1; // +1 for the pipe delimiter
717        }
718
719        cells
720    }
721
722    /// Split a table row into individual cell contents using Standard/GFM behavior.
723    pub fn split_table_row(row: &str) -> Vec<String> {
724        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
725    }
726
727    /// Determine the pipe style of a table row
728    ///
729    /// Handles tables inside blockquotes by stripping the blockquote prefix
730    /// before analyzing the pipe style.
731    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
732        // Strip blockquote prefix if present before analyzing pipe style
733        let content = Self::strip_blockquote_prefix(line);
734        let trimmed = content.trim();
735        if !trimmed.contains('|') {
736            return None;
737        }
738
739        let has_leading = trimmed.starts_with('|');
740        let has_trailing = trimmed.ends_with('|');
741
742        match (has_leading, has_trailing) {
743            (true, true) => Some("leading_and_trailing"),
744            (true, false) => Some("leading_only"),
745            (false, true) => Some("trailing_only"),
746            (false, false) => Some("no_leading_or_trailing"),
747        }
748    }
749
750    /// Extract blockquote prefix from a line, returning (prefix, content).
751    ///
752    /// This is useful for stripping the prefix before processing, then restoring it after.
753    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
754    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
755        // Find where the actual content starts (after blockquote markers and spaces)
756        let bytes = line.as_bytes();
757        let mut pos = 0;
758
759        // Skip leading whitespace (indent before blockquote marker)
760        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
761            pos += 1;
762        }
763
764        // If no blockquote marker, return empty prefix
765        if pos >= bytes.len() || bytes[pos] != b'>' {
766            return ("", line);
767        }
768
769        // Skip all blockquote markers and spaces
770        while pos < bytes.len() {
771            if bytes[pos] == b'>' {
772                pos += 1;
773                // Skip optional space after >
774                if pos < bytes.len() && bytes[pos] == b' ' {
775                    pos += 1;
776                }
777            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
778                pos += 1;
779            } else {
780                break;
781            }
782        }
783
784        // Split at the position where content starts
785        (&line[..pos], &line[pos..])
786    }
787
788    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
789    ///
790    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
791    /// Returns:
792    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
793    /// - content: The content after the list marker
794    /// - content_indent: The number of spaces needed for continuation lines to align with content
795    ///
796    /// For example:
797    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
798    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
799    /// - `"  - table"` returns `("  - ", "table", 4)`
800    ///
801    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
802    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
803        let bytes = line.as_bytes();
804
805        // Skip leading whitespace
806        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
807        let mut pos = leading_spaces;
808
809        if pos >= bytes.len() {
810            return ("", line, 0);
811        }
812
813        // Check for unordered list marker: -, *, +
814        if matches!(bytes[pos], b'-' | b'*' | b'+') {
815            pos += 1;
816
817            // Must be followed by space or tab (or end of line for marker-only lines)
818            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
819                // Skip the space after marker if present
820                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
821                    pos += 1;
822                }
823                let content_indent = pos;
824                return (&line[..pos], &line[pos..], content_indent);
825            }
826            // Not a list marker (e.g., "-word" or "--")
827            return ("", line, 0);
828        }
829
830        // Check for ordered list marker: digits followed by . or ) then space
831        if bytes[pos].is_ascii_digit() {
832            let digit_start = pos;
833            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
834                pos += 1;
835            }
836
837            // Must have at least one digit
838            if pos > digit_start && pos < bytes.len() {
839                // Check for . or ) followed by space/tab
840                if bytes[pos] == b'.' || bytes[pos] == b')' {
841                    pos += 1;
842                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
843                        // Skip the space after marker if present
844                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
845                            pos += 1;
846                        }
847                        let content_indent = pos;
848                        return (&line[..pos], &line[pos..], content_indent);
849                    }
850                }
851            }
852        }
853
854        ("", line, 0)
855    }
856
857    /// Extract the table row content from a line, stripping any list/blockquote prefix.
858    ///
859    /// This is useful for processing table rows that may be inside list items or blockquotes.
860    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
861    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
862        // First strip blockquote prefix
863        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
864
865        // Then handle list prefix if present
866        if let Some(ref list_ctx) = table_block.list_context {
867            if line_index == 0 {
868                // Header line: strip list prefix (handles both markers and indentation)
869                after_blockquote
870                    .strip_prefix(&list_ctx.list_prefix)
871                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
872            } else {
873                // Continuation lines: strip indentation
874                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
875            }
876        } else {
877            after_blockquote
878        }
879    }
880
881    /// Check if the content after a list marker looks like a table row.
882    /// This is used to detect tables that start on the same line as a list marker.
883    pub fn is_list_item_with_table_row(line: &str) -> bool {
884        let (prefix, content, _) = Self::extract_list_prefix(line);
885        if prefix.is_empty() {
886            return false;
887        }
888
889        // Check if the content after the list marker is a table row
890        // It must start with | (proper table format within a list)
891        let trimmed = content.trim();
892        if !trimmed.starts_with('|') {
893            return false;
894        }
895
896        // Use our table row detection on the content
897        Self::is_potential_table_row_content(content)
898    }
899
900    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
901    fn is_potential_table_row_content(content: &str) -> bool {
902        Self::is_potential_table_row(content)
903    }
904}
905
906#[cfg(test)]
907mod tests {
908    use super::*;
909    use crate::lint_context::LintContext;
910
911    #[test]
912    fn test_is_potential_table_row() {
913        // Basic valid table rows
914        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
915        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
916        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
917        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
918
919        // Multiple cells
920        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
921
922        // With whitespace
923        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
924        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
925
926        // Not table rows
927        assert!(!TableUtils::is_potential_table_row("- List item"));
928        assert!(!TableUtils::is_potential_table_row("* Another list"));
929        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
930        assert!(!TableUtils::is_potential_table_row("Regular text"));
931        assert!(!TableUtils::is_potential_table_row(""));
932        assert!(!TableUtils::is_potential_table_row("   "));
933
934        // Code blocks
935        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
936        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
937        assert!(!TableUtils::is_potential_table_row("Use ``a|b`` in prose"));
938        assert!(TableUtils::is_potential_table_row("| `fenced` | Uses ``` and ~~~ |"));
939        assert!(TableUtils::is_potential_table_row("`!foo && bar` | `(!foo) && bar`"));
940        assert!(!TableUtils::is_potential_table_row("`echo a | sed 's/a/b/'`"));
941
942        // Single pipe not enough
943        assert!(!TableUtils::is_potential_table_row("Just one |"));
944        assert!(!TableUtils::is_potential_table_row("| Just one"));
945
946        // Very long cells are valid in tables (no length limit for cell content)
947        let long_cell = "a".repeat(150);
948        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
949
950        // Cells with newlines
951        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
952
953        // Empty cells (Issue #129)
954        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
955        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
956        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
957    }
958
959    #[test]
960    fn test_list_items_with_pipes_not_table_rows() {
961        // Ordered list items should NOT be detected as table rows
962        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
963        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
964        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
965        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
966        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
967
968        // Unordered list items with tabs
969        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
970        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
971        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
972
973        // Indented list items (the trim_start normalizes indentation)
974        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
975        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
976        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
977
978        // Task list items
979        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
980        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
981
982        // Multiple pipes in list items
983        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
984        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
985
986        // These SHOULD still be detected as potential table rows
987        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
988        assert!(TableUtils::is_potential_table_row("cell | cell"));
989        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
990    }
991
992    #[test]
993    fn test_atx_headings_with_pipes_not_table_rows() {
994        // All 6 ATX heading levels with pipes
995        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
996        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
997        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
998        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
999        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
1000        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
1001
1002        // Multiple pipes in headings
1003        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1004        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1005
1006        // Headings with tab after hashes
1007        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1008        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1009
1010        // Heading with only hashes and pipe (empty heading text)
1011        assert!(!TableUtils::is_potential_table_row("# |"));
1012        assert!(!TableUtils::is_potential_table_row("## |"));
1013
1014        // Indented headings (spaces before #)
1015        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1016        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1017
1018        // Unicode content in headings (the original proptest failure case)
1019        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1020
1021        // 7+ hashes are NOT headings — should follow normal table detection
1022        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1023        // but with a space it still has 7+ hashes so not a heading
1024        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1025
1026        // Hash without space is NOT a heading, so pipe detection applies
1027        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1028
1029        // These SHOULD still be detected as potential table rows
1030        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1031        assert!(TableUtils::is_potential_table_row("text | #tag"));
1032    }
1033
1034    #[test]
1035    fn test_is_delimiter_row() {
1036        // Basic delimiter rows
1037        assert!(TableUtils::is_delimiter_row("|---|---|"));
1038        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1039        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1040        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1041
1042        // With varying dash counts
1043        assert!(TableUtils::is_delimiter_row("|-|--|"));
1044        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1045
1046        // With whitespace
1047        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1048        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1049
1050        // Multiple columns
1051        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1052
1053        // Without leading/trailing pipes
1054        assert!(TableUtils::is_delimiter_row("--- | ---"));
1055        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1056
1057        // Not delimiter rows
1058        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1059        assert!(!TableUtils::is_delimiter_row("Regular text"));
1060        assert!(!TableUtils::is_delimiter_row(""));
1061        assert!(!TableUtils::is_delimiter_row("|||"));
1062        assert!(!TableUtils::is_delimiter_row("| | |"));
1063
1064        // Must have dashes
1065        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1066        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1067
1068        // Mixed content
1069        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1070        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1071    }
1072
1073    #[test]
1074    fn test_count_cells() {
1075        // Basic counts
1076        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1077        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1078        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1079        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1080
1081        // Single cell
1082        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1083        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1084
1085        // Empty cells
1086        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1087        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1088
1089        // Many cells
1090        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1091
1092        // Edge cases
1093        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1094        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1095
1096        // No table
1097        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1098        assert_eq!(TableUtils::count_cells(""), 0);
1099        assert_eq!(TableUtils::count_cells("   "), 0);
1100
1101        // Whitespace handling
1102        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1103        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1104    }
1105
1106    #[test]
1107    fn test_count_cells_with_escaped_pipes() {
1108        // Pipes inside code spans are treated as content, not cell delimiters.
1109        // To include a literal pipe outside code spans, escape it with \|.
1110
1111        // Basic table structure
1112        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1113        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1114        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1115
1116        // Escaped pipes: \| keeps the pipe as content
1117        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1118        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1119
1120        // Escaped pipes inside backticks
1121        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1122
1123        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1124        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1125        // Double backslash inside backticks: pipe is still masked by code span
1126        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 2);
1127
1128        // Pipes inside code spans are content, not delimiters
1129        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
1130        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
1131        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
1132
1133        // Regex example - pipes in code spans are masked
1134        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 2);
1135        // Escaped pipe inside code is also masked (escape is redundant here)
1136        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1137    }
1138
1139    #[test]
1140    fn test_determine_pipe_style() {
1141        // All pipe styles
1142        assert_eq!(
1143            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1144            Some("leading_and_trailing")
1145        );
1146        assert_eq!(
1147            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1148            Some("leading_only")
1149        );
1150        assert_eq!(
1151            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1152            Some("trailing_only")
1153        );
1154        assert_eq!(
1155            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1156            Some("no_leading_or_trailing")
1157        );
1158
1159        // With whitespace
1160        assert_eq!(
1161            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1162            Some("leading_and_trailing")
1163        );
1164        assert_eq!(
1165            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1166            Some("leading_only")
1167        );
1168
1169        // No pipes
1170        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1171        assert_eq!(TableUtils::determine_pipe_style(""), None);
1172        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1173
1174        // Single pipe cases
1175        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1176        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1177        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1178    }
1179
1180    #[test]
1181    fn test_find_table_blocks_simple() {
1182        let content = "| Header 1 | Header 2 |
1183|-----------|-----------|
1184| Cell 1    | Cell 2    |
1185| Cell 3    | Cell 4    |";
1186
1187        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1188
1189        let tables = TableUtils::find_table_blocks(content, &ctx);
1190        assert_eq!(tables.len(), 1);
1191
1192        let table = &tables[0];
1193        assert_eq!(table.start_line, 0);
1194        assert_eq!(table.end_line, 3);
1195        assert_eq!(table.header_line, 0);
1196        assert_eq!(table.delimiter_line, 1);
1197        assert_eq!(table.content_lines, vec![2, 3]);
1198    }
1199
1200    #[test]
1201    fn test_find_table_blocks_multiple() {
1202        let content = "Some text
1203
1204| Table 1 | Col A |
1205|----------|-------|
1206| Data 1   | Val 1 |
1207
1208More text
1209
1210| Table 2 | Col 2 |
1211|----------|-------|
1212| Data 2   | Data  |";
1213
1214        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1215
1216        let tables = TableUtils::find_table_blocks(content, &ctx);
1217        assert_eq!(tables.len(), 2);
1218
1219        // First table
1220        assert_eq!(tables[0].start_line, 2);
1221        assert_eq!(tables[0].end_line, 4);
1222        assert_eq!(tables[0].header_line, 2);
1223        assert_eq!(tables[0].delimiter_line, 3);
1224        assert_eq!(tables[0].content_lines, vec![4]);
1225
1226        // Second table
1227        assert_eq!(tables[1].start_line, 8);
1228        assert_eq!(tables[1].end_line, 10);
1229        assert_eq!(tables[1].header_line, 8);
1230        assert_eq!(tables[1].delimiter_line, 9);
1231        assert_eq!(tables[1].content_lines, vec![10]);
1232    }
1233
1234    #[test]
1235    fn test_find_table_blocks_no_content_rows() {
1236        let content = "| Header 1 | Header 2 |
1237|-----------|-----------|
1238
1239Next paragraph";
1240
1241        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242
1243        let tables = TableUtils::find_table_blocks(content, &ctx);
1244        assert_eq!(tables.len(), 1);
1245
1246        let table = &tables[0];
1247        assert_eq!(table.start_line, 0);
1248        assert_eq!(table.end_line, 1); // Just header and delimiter
1249        assert_eq!(table.content_lines.len(), 0);
1250    }
1251
1252    #[test]
1253    fn test_find_table_blocks_in_code_block() {
1254        let content = "```
1255| Not | A | Table |
1256|-----|---|-------|
1257| In  | Code | Block |
1258```
1259
1260| Real | Table |
1261|------|-------|
1262| Data | Here  |";
1263
1264        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1265
1266        let tables = TableUtils::find_table_blocks(content, &ctx);
1267        assert_eq!(tables.len(), 1); // Only the table outside code block
1268
1269        let table = &tables[0];
1270        assert_eq!(table.header_line, 6);
1271        assert_eq!(table.delimiter_line, 7);
1272    }
1273
1274    #[test]
1275    fn test_find_table_blocks_no_tables() {
1276        let content = "Just regular text
1277No tables here
1278- List item with | pipe
1279* Another list item";
1280
1281        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1282
1283        let tables = TableUtils::find_table_blocks(content, &ctx);
1284        assert_eq!(tables.len(), 0);
1285    }
1286
1287    #[test]
1288    fn test_find_table_blocks_malformed() {
1289        let content = "| Header without delimiter |
1290| This looks like table |
1291But no delimiter row
1292
1293| Proper | Table |
1294|---------|-------|
1295| Data    | Here  |";
1296
1297        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1298
1299        let tables = TableUtils::find_table_blocks(content, &ctx);
1300        assert_eq!(tables.len(), 1); // Only the proper table
1301        assert_eq!(tables[0].header_line, 4);
1302    }
1303
1304    #[test]
1305    fn test_edge_cases() {
1306        // Test empty content
1307        assert!(!TableUtils::is_potential_table_row(""));
1308        assert!(!TableUtils::is_delimiter_row(""));
1309        assert_eq!(TableUtils::count_cells(""), 0);
1310        assert_eq!(TableUtils::determine_pipe_style(""), None);
1311
1312        // Test whitespace only
1313        assert!(!TableUtils::is_potential_table_row("   "));
1314        assert!(!TableUtils::is_delimiter_row("   "));
1315        assert_eq!(TableUtils::count_cells("   "), 0);
1316        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1317
1318        // Test single character
1319        assert!(!TableUtils::is_potential_table_row("|"));
1320        assert!(!TableUtils::is_delimiter_row("|"));
1321        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1322
1323        // Test very long lines are valid table rows (no length limit)
1324        // Test both single-column and multi-column long lines
1325        let long_single = format!("| {} |", "a".repeat(200));
1326        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1327
1328        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1329        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1330
1331        // Test unicode
1332        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1333        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1334        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1335    }
1336
1337    #[test]
1338    fn test_table_block_struct() {
1339        let block = TableBlock {
1340            start_line: 0,
1341            end_line: 5,
1342            header_line: 0,
1343            delimiter_line: 1,
1344            content_lines: vec![2, 3, 4, 5],
1345            list_context: None,
1346        };
1347
1348        // Test Debug trait
1349        let debug_str = format!("{block:?}");
1350        assert!(debug_str.contains("TableBlock"));
1351        assert!(debug_str.contains("start_line: 0"));
1352
1353        // Test Clone trait
1354        let cloned = block.clone();
1355        assert_eq!(cloned.start_line, block.start_line);
1356        assert_eq!(cloned.end_line, block.end_line);
1357        assert_eq!(cloned.header_line, block.header_line);
1358        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1359        assert_eq!(cloned.content_lines, block.content_lines);
1360        assert!(cloned.list_context.is_none());
1361    }
1362
1363    #[test]
1364    fn test_split_table_row() {
1365        // Basic split
1366        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1367        assert_eq!(cells.len(), 3);
1368        assert_eq!(cells[0].trim(), "Cell 1");
1369        assert_eq!(cells[1].trim(), "Cell 2");
1370        assert_eq!(cells[2].trim(), "Cell 3");
1371
1372        // Without trailing pipe
1373        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1374        assert_eq!(cells.len(), 2);
1375
1376        // Empty cells
1377        let cells = TableUtils::split_table_row("| | | |");
1378        assert_eq!(cells.len(), 3);
1379
1380        // Single cell
1381        let cells = TableUtils::split_table_row("| Cell |");
1382        assert_eq!(cells.len(), 1);
1383        assert_eq!(cells[0].trim(), "Cell");
1384
1385        // No pipes
1386        let cells = TableUtils::split_table_row("No pipes here");
1387        assert_eq!(cells.len(), 0);
1388    }
1389
1390    #[test]
1391    fn test_split_table_row_with_escaped_pipes() {
1392        // Escaped pipes should be preserved in cell content
1393        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1394        assert_eq!(cells.len(), 2);
1395        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1396
1397        // Double backslash + pipe is NOT escaped
1398        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1399        assert_eq!(cells.len(), 3);
1400    }
1401
1402    #[test]
1403    fn test_split_table_row_with_flavor_mkdocs() {
1404        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1405        let cells =
1406            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1407        assert_eq!(cells.len(), 2);
1408        assert!(
1409            cells[1].contains("`x | y`"),
1410            "Inline code with pipe should be single cell in MkDocs flavor"
1411        );
1412
1413        // Multiple pipes in inline code
1414        let cells =
1415            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1416        assert_eq!(cells.len(), 2);
1417        assert!(cells[1].contains("`a | b | c`"));
1418    }
1419
1420    #[test]
1421    fn test_split_table_row_with_flavor_standard() {
1422        // Pipes in inline code are NOT cell delimiters for any flavor
1423        let cells =
1424            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1425        assert_eq!(
1426            cells.len(),
1427            2,
1428            "Pipes in code spans should not be cell delimiters, got {cells:?}"
1429        );
1430        assert!(
1431            cells[1].contains("`x | y`"),
1432            "Inline code with pipe should be single cell"
1433        );
1434    }
1435
1436    // === extract_blockquote_prefix tests ===
1437
1438    #[test]
1439    fn test_extract_blockquote_prefix_no_blockquote() {
1440        // Regular table row without blockquote
1441        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1442        assert_eq!(prefix, "");
1443        assert_eq!(content, "| H1 | H2 |");
1444    }
1445
1446    #[test]
1447    fn test_extract_blockquote_prefix_single_level() {
1448        // Single blockquote level
1449        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1450        assert_eq!(prefix, "> ");
1451        assert_eq!(content, "| H1 | H2 |");
1452    }
1453
1454    #[test]
1455    fn test_extract_blockquote_prefix_double_level() {
1456        // Double blockquote level
1457        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1458        assert_eq!(prefix, ">> ");
1459        assert_eq!(content, "| H1 | H2 |");
1460    }
1461
1462    #[test]
1463    fn test_extract_blockquote_prefix_triple_level() {
1464        // Triple blockquote level
1465        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1466        assert_eq!(prefix, ">>> ");
1467        assert_eq!(content, "| H1 | H2 |");
1468    }
1469
1470    #[test]
1471    fn test_extract_blockquote_prefix_with_spaces() {
1472        // Blockquote with spaces between markers
1473        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1474        assert_eq!(prefix, "> > ");
1475        assert_eq!(content, "| H1 | H2 |");
1476    }
1477
1478    #[test]
1479    fn test_extract_blockquote_prefix_indented() {
1480        // Indented blockquote
1481        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1482        assert_eq!(prefix, "  > ");
1483        assert_eq!(content, "| H1 | H2 |");
1484    }
1485
1486    #[test]
1487    fn test_extract_blockquote_prefix_no_space_after() {
1488        // Blockquote without space after marker
1489        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1490        assert_eq!(prefix, ">");
1491        assert_eq!(content, "| H1 | H2 |");
1492    }
1493
1494    #[test]
1495    fn test_determine_pipe_style_in_blockquote() {
1496        // determine_pipe_style should handle blockquotes correctly
1497        assert_eq!(
1498            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1499            Some("leading_and_trailing")
1500        );
1501        assert_eq!(
1502            TableUtils::determine_pipe_style("> H1 | H2"),
1503            Some("no_leading_or_trailing")
1504        );
1505        assert_eq!(
1506            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1507            Some("leading_and_trailing")
1508        );
1509        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1510    }
1511
1512    #[test]
1513    fn test_list_table_delimiter_requires_indentation() {
1514        // Test case: list item contains pipe, but delimiter line is at column 1
1515        // This should NOT be detected as a list table since the delimiter has no indentation.
1516        // The result is a non-list table starting at line 0 (the list item becomes the header)
1517        // but list_context should be None.
1518        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1520        let tables = TableUtils::find_table_blocks(content, &ctx);
1521
1522        // The table will be detected starting at line 0, but crucially it should NOT have
1523        // list_context set, meaning it won't be treated as a list-table for column count purposes
1524        assert_eq!(tables.len(), 1, "Should find exactly one table");
1525        assert!(
1526            tables[0].list_context.is_none(),
1527            "Should NOT have list context since delimiter has no indentation"
1528        );
1529    }
1530
1531    #[test]
1532    fn test_list_table_with_properly_indented_delimiter() {
1533        // Test case: list item with table header, delimiter properly indented
1534        // This SHOULD be detected as a list table
1535        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1537        let tables = TableUtils::find_table_blocks(content, &ctx);
1538
1539        // Should find exactly one list-table starting at line 0
1540        assert_eq!(tables.len(), 1, "Should find exactly one table");
1541        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1542        assert!(
1543            tables[0].list_context.is_some(),
1544            "Should be a list table since delimiter is properly indented"
1545        );
1546    }
1547
1548    #[test]
1549    fn test_mask_pipes_in_inline_code_regular_backticks() {
1550        // Regular backtick code span: pipe should be masked
1551        let result = TableUtils::mask_pipes_in_inline_code("| `code | here` |");
1552        assert_eq!(result, "| `code _ here` |");
1553    }
1554
1555    #[test]
1556    fn test_mask_pipes_in_inline_code_escaped_backtick_not_code_span() {
1557        // Escaped backtick (\`) is literal text, not a code span opener.
1558        // The pipe should NOT be masked.
1559        let result = TableUtils::mask_pipes_in_inline_code(r"| \`not code | still pipe\` |");
1560        assert_eq!(result, r"| \`not code | still pipe\` |");
1561    }
1562
1563    #[test]
1564    fn test_mask_pipes_in_inline_code_escaped_backslash_then_backtick() {
1565        // Escaped backslash (\\) followed by backtick: the backtick IS a code span opener.
1566        // The pipe inside the code span SHOULD be masked.
1567        let result = TableUtils::mask_pipes_in_inline_code(r"| \\`real code | masked\\` |");
1568        // \\` = escaped backslash + real backtick (code span opener)
1569        // The pipe between the backticks should be masked
1570        assert_eq!(result, r"| \\`real code _ masked\\` |");
1571    }
1572
1573    #[test]
1574    fn test_mask_pipes_in_inline_code_triple_backslash_before_backtick() {
1575        // Three backslashes before backtick: odd count means backtick is escaped
1576        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\`not code | pipe\\\` |");
1577        assert_eq!(result, r"| \\\`not code | pipe\\\` |");
1578    }
1579
1580    #[test]
1581    fn test_mask_pipes_in_inline_code_four_backslashes_before_backtick() {
1582        // Four backslashes before backtick: even count means backtick is a real delimiter
1583        let result = TableUtils::mask_pipes_in_inline_code(r"| \\\\`code | here\\\\` |");
1584        assert_eq!(result, r"| \\\\`code _ here\\\\` |");
1585    }
1586
1587    #[test]
1588    fn test_mask_pipes_in_inline_code_no_backslash() {
1589        // No backslashes at all: standard behavior, pipe inside code span is masked
1590        let result = TableUtils::mask_pipes_in_inline_code("before `a | b` after");
1591        assert_eq!(result, "before `a _ b` after");
1592    }
1593
1594    #[test]
1595    fn test_mask_pipes_in_inline_code_no_code_span() {
1596        // No backticks at all: nothing should be masked
1597        let result = TableUtils::mask_pipes_in_inline_code("| col1 | col2 |");
1598        assert_eq!(result, "| col1 | col2 |");
1599    }
1600
1601    #[test]
1602    fn test_mask_pipes_in_inline_code_backslash_before_closing_backtick() {
1603        // Per CommonMark spec, backslash escapes do NOT work inside code spans.
1604        // Inside a code span, `\` is a literal character. So `foo\` is a valid
1605        // code span containing "foo\", and the closing backtick is NOT escaped.
1606        //
1607        // Input: | `foo\` | bar |
1608        // The code span is `foo\` (backtick opens, backslash is literal, backtick closes).
1609        // The pipe after the code span is a real delimiter, producing 2 cells.
1610        // The pipe inside the code span should be left alone (there isn't one here).
1611        let result = TableUtils::mask_pipes_in_inline_code(r"| `foo\` | bar |");
1612        // The backslash before closing backtick is literal inside the code span,
1613        // so the code span closes at that backtick. The pipe between cells is NOT masked.
1614        assert_eq!(result, r"| `foo\` | bar |");
1615    }
1616
1617    #[test]
1618    fn test_mask_pipes_in_inline_code_backslash_literal_with_pipe_inside() {
1619        // Code span contains a backslash and a pipe: `a\|b`
1620        // The backslash is literal inside the code span (CommonMark spec).
1621        // The pipe is inside the code span, so it should be masked.
1622        let result = TableUtils::mask_pipes_in_inline_code(r"| `a\|b` | col2 |");
1623        assert_eq!(result, r"| `a\_b` | col2 |");
1624    }
1625
1626    #[test]
1627    fn test_count_preceding_backslashes() {
1628        let chars: Vec<char> = r"abc\\\`def".chars().collect();
1629        // Position of backtick is at index 6 (a=0, b=1, c=2, \=3, \=4, \=5, `=6)
1630        assert_eq!(TableUtils::count_preceding_backslashes(&chars, 6), 3);
1631
1632        let chars2: Vec<char> = r"abc\\`def".chars().collect();
1633        // Position of backtick is at index 5
1634        assert_eq!(TableUtils::count_preceding_backslashes(&chars2, 5), 2);
1635
1636        let chars3: Vec<char> = "`def".chars().collect();
1637        // Position of backtick is at index 0 -- no preceding chars
1638        assert_eq!(TableUtils::count_preceding_backslashes(&chars3, 0), 0);
1639    }
1640
1641    #[test]
1642    fn test_has_unescaped_pipe_backslash_literal_in_code_span() {
1643        // Per CommonMark: backslashes are literal inside code spans.
1644        // `foo\` is a complete code span, so the pipe after it is outside code.
1645        assert!(TableUtils::has_unescaped_pipe_outside_inline_code(r"`foo\` | bar"));
1646
1647        // Escaped backtick outside code span: \` is not a code span opener
1648        assert!(TableUtils::has_unescaped_pipe_outside_inline_code(r"\`foo | bar\`"));
1649
1650        // Pipe inside code span should not count
1651        assert!(!TableUtils::has_unescaped_pipe_outside_inline_code(r"`foo | bar`"));
1652    }
1653
1654    #[test]
1655    fn test_table_after_code_span_detected() {
1656        use crate::config::MarkdownFlavor;
1657
1658        let content = "`code`\n\n| A | B |\n|---|---|\n| 1 | 2 |\n";
1659        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1660        assert!(!ctx.table_blocks.is_empty(), "Table after code span should be detected");
1661    }
1662
1663    #[test]
1664    fn test_table_inside_html_comment_not_detected() {
1665        use crate::config::MarkdownFlavor;
1666
1667        let content = "<!--\n| A | B |\n|---|---|\n| 1 | 2 |\n-->\n";
1668        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1669        assert!(
1670            ctx.table_blocks.is_empty(),
1671            "Table inside HTML comment should not be detected"
1672        );
1673    }
1674}