Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Check if a line looks like a potential table row
33    pub fn is_potential_table_row(line: &str) -> bool {
34        let trimmed = line.trim();
35        if trimmed.is_empty() || !trimmed.contains('|') {
36            return false;
37        }
38
39        // Skip lines that are clearly not table rows
40        // Unordered list items with space or tab after marker
41        if trimmed.starts_with("- ")
42            || trimmed.starts_with("* ")
43            || trimmed.starts_with("+ ")
44            || trimmed.starts_with("-\t")
45            || trimmed.starts_with("*\t")
46            || trimmed.starts_with("+\t")
47        {
48            return false;
49        }
50
51        // Skip ordered list items: digits followed by . or ) then space/tab
52        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
53            && first_non_digit > 0
54        {
55            let after_digits = &trimmed[first_non_digit..];
56            if after_digits.starts_with(". ")
57                || after_digits.starts_with(".\t")
58                || after_digits.starts_with(") ")
59                || after_digits.starts_with(")\t")
60            {
61                return false;
62            }
63        }
64
65        // Skip ATX headings (# through ######)
66        if trimmed.starts_with('#') {
67            let hash_count = trimmed.bytes().take_while(|&b| b == b'#').count();
68            if hash_count <= 6 {
69                let after_hashes = &trimmed[hash_count..];
70                if after_hashes.is_empty() || after_hashes.starts_with(' ') || after_hashes.starts_with('\t') {
71                    return false;
72                }
73            }
74        }
75
76        // Skip lines that are clearly code or inline code
77        if trimmed.starts_with("`") || trimmed.contains("``") {
78            return false;
79        }
80
81        // Must have at least 2 parts when split by |
82        let parts: Vec<&str> = trimmed.split('|').collect();
83        if parts.len() < 2 {
84            return false;
85        }
86
87        // Check if it looks like a table row by having reasonable content between pipes
88        let mut valid_parts = 0;
89        let mut total_non_empty_parts = 0;
90
91        for part in &parts {
92            let part_trimmed = part.trim();
93            // Skip empty parts (from leading/trailing pipes)
94            if part_trimmed.is_empty() {
95                continue;
96            }
97            total_non_empty_parts += 1;
98
99            // Count parts that look like table cells (reasonable content, no newlines)
100            if !part_trimmed.contains('\n') {
101                valid_parts += 1;
102            }
103        }
104
105        // Check if all non-empty parts are valid (no newlines)
106        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
107            // Some cells contain newlines, not a valid table row
108            return false;
109        }
110
111        // GFM allows tables with all empty cells (e.g., |||)
112        // These are valid if they have proper table formatting (leading and trailing pipes)
113        if total_non_empty_parts == 0 {
114            // Empty cells are only valid with proper pipe formatting
115            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
116        }
117
118        // GFM allows single-column tables, so >= 1 valid part is enough
119        // when the line has proper table formatting (pipes)
120        if trimmed.starts_with('|') && trimmed.ends_with('|') {
121            // Properly formatted table row with pipes on both ends
122            valid_parts >= 1
123        } else {
124            // For rows without proper pipe formatting, require at least 2 cells
125            valid_parts >= 2
126        }
127    }
128
129    /// Check if a line is a table delimiter row (e.g., |---|---|)
130    pub fn is_delimiter_row(line: &str) -> bool {
131        let trimmed = line.trim();
132        if !trimmed.contains('|') || !trimmed.contains('-') {
133            return false;
134        }
135
136        // Split by pipes and check each part
137        let parts: Vec<&str> = trimmed.split('|').collect();
138        let mut valid_delimiter_parts = 0;
139        let mut total_non_empty_parts = 0;
140
141        for part in &parts {
142            let part_trimmed = part.trim();
143            if part_trimmed.is_empty() {
144                continue; // Skip empty parts from leading/trailing pipes
145            }
146
147            total_non_empty_parts += 1;
148
149            // Check if this part looks like a delimiter (contains dashes and optionally colons)
150            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
151                valid_delimiter_parts += 1;
152            }
153        }
154
155        // All non-empty parts must be valid delimiters, and there must be at least one
156        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
157    }
158
159    /// Strip blockquote prefix from a line, returning the content without the prefix
160    fn strip_blockquote_prefix(line: &str) -> &str {
161        let trimmed = line.trim_start();
162        if trimmed.starts_with('>') {
163            // Strip all blockquote markers and following space
164            let mut rest = trimmed;
165            while rest.starts_with('>') {
166                rest = rest.strip_prefix('>').unwrap_or(rest);
167                rest = rest.trim_start_matches(' ');
168            }
169            rest
170        } else {
171            line
172        }
173    }
174
175    /// Find all table blocks in the content with optimized detection
176    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
177    pub fn find_table_blocks_with_code_info(
178        content: &str,
179        code_blocks: &[(usize, usize)],
180        code_spans: &[crate::lint_context::CodeSpan],
181        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
182    ) -> Vec<TableBlock> {
183        let lines: Vec<&str> = content.lines().collect();
184        let mut tables = Vec::new();
185        let mut i = 0;
186
187        // Pre-compute line positions for efficient code block checking
188        let mut line_positions = Vec::with_capacity(lines.len());
189        let mut pos = 0;
190        for line in &lines {
191            line_positions.push(pos);
192            pos += line.len() + 1; // +1 for newline
193        }
194
195        // Stack of active list content indents for continuation table tracking.
196        // Supports nested lists: when a child list is seen, we push; when we
197        // dedent past a level, we pop back to the enclosing list.
198        let mut list_indent_stack: Vec<usize> = Vec::new();
199
200        while i < lines.len() {
201            // Skip lines in code blocks, code spans, or HTML comments
202            let line_start = line_positions[i];
203            let in_code =
204                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
205                    || code_spans
206                        .iter()
207                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
208            let in_html_comment = html_comment_ranges
209                .iter()
210                .any(|range| line_start >= range.start && line_start < range.end);
211
212            if in_code || in_html_comment {
213                i += 1;
214                continue;
215            }
216
217            // Strip blockquote prefix for table detection
218            let line_content = Self::strip_blockquote_prefix(lines[i]);
219
220            // Update active list tracking
221            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
222            if !list_prefix.is_empty() {
223                // Line has a list marker. Pop any deeper/equal levels, then push this one.
224                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
225                    list_indent_stack.pop();
226                }
227                list_indent_stack.push(content_indent);
228            } else if !line_content.trim().is_empty() {
229                // Non-blank line without a marker: pop any levels we've dedented past
230                let leading = line_content.len() - line_content.trim_start().len();
231                while list_indent_stack.last().is_some_and(|&top| leading < top) {
232                    list_indent_stack.pop();
233                }
234            }
235            // Blank lines keep the stack unchanged (blank lines don't end list items)
236
237            // Check if this is a list item that contains a table row on the same line,
238            // or a continuation table indented under an active list item
239            let (is_same_line_list_table, effective_content) =
240                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
241                    (true, list_content)
242                } else {
243                    (false, line_content)
244                };
245
246            // Detect continuation list tables: no marker on this line, but indented
247            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
248            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
249                let leading = line_content.len() - line_content.trim_start().len();
250                // Find the deepest list level this line is indented under
251                list_indent_stack
252                    .iter()
253                    .rev()
254                    .find(|&&indent| leading >= indent)
255                    .copied()
256            } else {
257                None
258            };
259
260            let is_continuation_list_table = continuation_indent.is_some()
261                && {
262                    let indent = continuation_indent.unwrap();
263                    let leading = line_content.len() - line_content.trim_start().len();
264                    // Per CommonMark, 4+ spaces beyond content indent is a code block
265                    leading < indent + 4
266                }
267                && Self::is_potential_table_row(effective_content);
268
269            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
270
271            // For continuation list tables, use the matched list indent
272            let effective_content_indent = if is_same_line_list_table {
273                content_indent
274            } else if is_continuation_list_table {
275                continuation_indent.unwrap()
276            } else {
277                0
278            };
279
280            // Look for potential table start
281            if is_any_list_table || Self::is_potential_table_row(effective_content) {
282                // For list tables (same-line or continuation), check indented continuation lines
283                // For regular tables, check the next line directly
284                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
285                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
286                    if is_any_list_table {
287                        // Verify the delimiter line has proper indentation
288                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
289                        if leading_spaces >= effective_content_indent {
290                            // Has proper indentation, strip it and check as delimiter
291                            (
292                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
293                                true,
294                            )
295                        } else {
296                            // Not enough indentation - not a list table
297                            (next_raw, false)
298                        }
299                    } else {
300                        (next_raw, true)
301                    }
302                } else {
303                    ("", true)
304                };
305
306                // For list tables, only accept if delimiter has valid indentation
307                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
308
309                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
310                    // Found a table! Find its end
311                    let table_start = i;
312                    let header_line = i;
313                    let delimiter_line = i + 1;
314                    let mut table_end = i + 1; // Include the delimiter row
315                    let mut content_lines = Vec::new();
316
317                    // Continue while we have table rows
318                    let mut j = i + 2;
319                    while j < lines.len() {
320                        let line = lines[j];
321                        // Strip blockquote prefix for checking
322                        let raw_content = Self::strip_blockquote_prefix(line);
323
324                        // For list tables, strip expected indentation
325                        let line_content = if effective_is_list_table {
326                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
327                        } else {
328                            raw_content
329                        };
330
331                        if line_content.trim().is_empty() {
332                            // Empty line ends the table
333                            break;
334                        }
335
336                        // For list tables, the continuation line must have proper indentation
337                        if effective_is_list_table {
338                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
339                            if leading_spaces < effective_content_indent {
340                                // Not enough indentation - end of table
341                                break;
342                            }
343                        }
344
345                        if Self::is_potential_table_row(line_content) {
346                            content_lines.push(j);
347                            table_end = j;
348                            j += 1;
349                        } else {
350                            // Non-table line ends the table
351                            break;
352                        }
353                    }
354
355                    let list_context = if effective_is_list_table {
356                        if is_same_line_list_table {
357                            // Same-line: prefix is the actual list marker (e.g., "- ")
358                            Some(ListTableContext {
359                                list_prefix: list_prefix.to_string(),
360                                content_indent: effective_content_indent,
361                            })
362                        } else {
363                            // Continuation: prefix is the indentation spaces
364                            Some(ListTableContext {
365                                list_prefix: " ".repeat(effective_content_indent),
366                                content_indent: effective_content_indent,
367                            })
368                        }
369                    } else {
370                        None
371                    };
372
373                    tables.push(TableBlock {
374                        start_line: table_start,
375                        end_line: table_end,
376                        header_line,
377                        delimiter_line,
378                        content_lines,
379                        list_context,
380                    });
381                    i = table_end + 1;
382                } else {
383                    i += 1;
384                }
385            } else {
386                i += 1;
387            }
388        }
389
390        tables
391    }
392
393    /// Strip list continuation indentation from a line.
394    /// For lines that are continuations of a list item's content, strip the expected indent.
395    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
396        let bytes = line.as_bytes();
397        let mut spaces = 0;
398
399        for &b in bytes {
400            if b == b' ' {
401                spaces += 1;
402            } else if b == b'\t' {
403                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
404                spaces = (spaces / 4 + 1) * 4;
405            } else {
406                break;
407            }
408
409            if spaces >= expected_indent {
410                break;
411            }
412        }
413
414        // Strip at most expected_indent characters
415        let strip_count = spaces.min(expected_indent).min(line.len());
416        // Count actual bytes to strip (handling tabs)
417        let mut byte_count = 0;
418        let mut counted_spaces = 0;
419        for &b in bytes {
420            if counted_spaces >= strip_count {
421                break;
422            }
423            if b == b' ' {
424                counted_spaces += 1;
425                byte_count += 1;
426            } else if b == b'\t' {
427                counted_spaces = (counted_spaces / 4 + 1) * 4;
428                byte_count += 1;
429            } else {
430                break;
431            }
432        }
433
434        &line[byte_count..]
435    }
436
437    /// Find all table blocks in the content with optimized detection
438    /// This is a backward-compatible wrapper that accepts LintContext
439    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
440        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
441    }
442
443    /// Count the number of cells in a table row
444    pub fn count_cells(row: &str) -> usize {
445        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
446    }
447
448    /// Count the number of cells in a table row with flavor-specific behavior
449    ///
450    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
451    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
452    ///
453    /// This function strips blockquote prefixes before counting cells, so it works
454    /// correctly for tables inside blockquotes.
455    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
456        // Strip blockquote prefix if present before counting cells
457        let (_, content) = Self::extract_blockquote_prefix(row);
458        Self::split_table_row_with_flavor(content, flavor).len()
459    }
460
461    /// Mask pipes inside inline code blocks with a placeholder character
462    pub fn mask_pipes_in_inline_code(text: &str) -> String {
463        let mut result = String::new();
464        let chars: Vec<char> = text.chars().collect();
465        let mut i = 0;
466
467        while i < chars.len() {
468            if chars[i] == '`' {
469                // Count consecutive backticks at start
470                let start = i;
471                let mut backtick_count = 0;
472                while i < chars.len() && chars[i] == '`' {
473                    backtick_count += 1;
474                    i += 1;
475                }
476
477                // Look for matching closing backticks
478                let mut found_closing = false;
479                let mut j = i;
480
481                while j < chars.len() {
482                    if chars[j] == '`' {
483                        // Count potential closing backticks
484                        let close_start = j;
485                        let mut close_count = 0;
486                        while j < chars.len() && chars[j] == '`' {
487                            close_count += 1;
488                            j += 1;
489                        }
490
491                        if close_count == backtick_count {
492                            // Found matching closing backticks
493                            found_closing = true;
494
495                            // Valid inline code - add with pipes masked
496                            result.extend(chars[start..i].iter());
497
498                            for &ch in chars.iter().take(close_start).skip(i) {
499                                if ch == '|' {
500                                    result.push('_'); // Mask pipe with underscore
501                                } else {
502                                    result.push(ch);
503                                }
504                            }
505
506                            result.extend(chars[close_start..j].iter());
507                            i = j;
508                            break;
509                        }
510                        // If not matching, continue searching (j is already past these backticks)
511                    } else {
512                        j += 1;
513                    }
514                }
515
516                if !found_closing {
517                    // No matching closing found, treat as regular text
518                    result.extend(chars[start..i].iter());
519                }
520            } else {
521                result.push(chars[i]);
522                i += 1;
523            }
524        }
525
526        result
527    }
528
529    /// Escape pipes inside inline code blocks with backslash.
530    /// Converts `|` to `\|` inside backtick spans.
531    /// Used by auto-fix to preserve content while making tables valid.
532    pub fn escape_pipes_in_inline_code(text: &str) -> String {
533        let mut result = String::new();
534        let chars: Vec<char> = text.chars().collect();
535        let mut i = 0;
536
537        while i < chars.len() {
538            if chars[i] == '`' {
539                let start = i;
540                let mut backtick_count = 0;
541                while i < chars.len() && chars[i] == '`' {
542                    backtick_count += 1;
543                    i += 1;
544                }
545
546                let mut found_closing = false;
547                let mut j = i;
548
549                while j < chars.len() {
550                    if chars[j] == '`' {
551                        let close_start = j;
552                        let mut close_count = 0;
553                        while j < chars.len() && chars[j] == '`' {
554                            close_count += 1;
555                            j += 1;
556                        }
557
558                        if close_count == backtick_count {
559                            found_closing = true;
560                            result.extend(chars[start..i].iter());
561
562                            for &ch in chars.iter().take(close_start).skip(i) {
563                                if ch == '|' {
564                                    result.push('\\');
565                                    result.push('|');
566                                } else {
567                                    result.push(ch);
568                                }
569                            }
570
571                            result.extend(chars[close_start..j].iter());
572                            i = j;
573                            break;
574                        }
575                    } else {
576                        j += 1;
577                    }
578                }
579
580                if !found_closing {
581                    result.extend(chars[start..i].iter());
582                }
583            } else {
584                result.push(chars[i]);
585                i += 1;
586            }
587        }
588
589        result
590    }
591
592    /// Mask escaped pipes for accurate table cell parsing
593    ///
594    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
595    /// - `\|` → escaped pipe → masked (stays as cell content)
596    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
597    ///
598    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
599    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
600    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
601    ///
602    /// To include a literal pipe in a table cell (even in code), you must escape it:
603    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
604    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
605        let mut result = String::new();
606        let chars: Vec<char> = text.chars().collect();
607        let mut i = 0;
608
609        while i < chars.len() {
610            if chars[i] == '\\' {
611                if i + 1 < chars.len() && chars[i + 1] == '\\' {
612                    // Escaped backslash: \\ → push both and continue
613                    // The next character (if it's a pipe) will be a real delimiter
614                    result.push('\\');
615                    result.push('\\');
616                    i += 2;
617                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
618                    // Escaped pipe: \| → mask the pipe
619                    result.push('\\');
620                    result.push('_'); // Mask the pipe
621                    i += 2;
622                } else {
623                    // Single backslash not followed by \ or | → just push it
624                    result.push(chars[i]);
625                    i += 1;
626                }
627            } else {
628                result.push(chars[i]);
629                i += 1;
630            }
631        }
632
633        result
634    }
635
636    /// Split a table row into individual cell contents with flavor-specific behavior.
637    ///
638    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
639    /// This is the foundation for both cell counting and cell content extraction.
640    ///
641    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
642    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
643    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
644        let trimmed = row.trim();
645
646        if !trimmed.contains('|') {
647            return Vec::new();
648        }
649
650        // First, mask escaped pipes (same for all flavors)
651        let masked = Self::mask_pipes_for_table_parsing(trimmed);
652
653        // For MkDocs flavor, also mask pipes inside inline code
654        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
655            Self::mask_pipes_in_inline_code(&masked)
656        } else {
657            masked
658        };
659
660        let has_leading = final_masked.starts_with('|');
661        let has_trailing = final_masked.ends_with('|');
662
663        let mut masked_content = final_masked.as_str();
664        let mut orig_content = trimmed;
665
666        if has_leading {
667            masked_content = &masked_content[1..];
668            orig_content = &orig_content[1..];
669        }
670
671        // Track whether we actually strip a trailing pipe
672        let stripped_trailing = has_trailing && !masked_content.is_empty();
673        if stripped_trailing {
674            masked_content = &masked_content[..masked_content.len() - 1];
675            orig_content = &orig_content[..orig_content.len() - 1];
676        }
677
678        // Handle edge cases for degenerate inputs
679        if masked_content.is_empty() {
680            if stripped_trailing {
681                // "||" case: two pipes with empty content between = one empty cell
682                return vec![String::new()];
683            } else {
684                // "|" case: single pipe, not a valid table row
685                return Vec::new();
686            }
687        }
688
689        let masked_parts: Vec<&str> = masked_content.split('|').collect();
690        let mut cells = Vec::new();
691        let mut pos = 0;
692
693        for masked_cell in masked_parts {
694            let cell_len = masked_cell.len();
695            let orig_cell = if pos + cell_len <= orig_content.len() {
696                &orig_content[pos..pos + cell_len]
697            } else {
698                masked_cell
699            };
700            cells.push(orig_cell.to_string());
701            pos += cell_len + 1; // +1 for the pipe delimiter
702        }
703
704        cells
705    }
706
707    /// Split a table row into individual cell contents using Standard/GFM behavior.
708    pub fn split_table_row(row: &str) -> Vec<String> {
709        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
710    }
711
712    /// Determine the pipe style of a table row
713    ///
714    /// Handles tables inside blockquotes by stripping the blockquote prefix
715    /// before analyzing the pipe style.
716    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
717        // Strip blockquote prefix if present before analyzing pipe style
718        let content = Self::strip_blockquote_prefix(line);
719        let trimmed = content.trim();
720        if !trimmed.contains('|') {
721            return None;
722        }
723
724        let has_leading = trimmed.starts_with('|');
725        let has_trailing = trimmed.ends_with('|');
726
727        match (has_leading, has_trailing) {
728            (true, true) => Some("leading_and_trailing"),
729            (true, false) => Some("leading_only"),
730            (false, true) => Some("trailing_only"),
731            (false, false) => Some("no_leading_or_trailing"),
732        }
733    }
734
735    /// Extract blockquote prefix from a line, returning (prefix, content).
736    ///
737    /// This is useful for stripping the prefix before processing, then restoring it after.
738    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
739    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
740        // Find where the actual content starts (after blockquote markers and spaces)
741        let bytes = line.as_bytes();
742        let mut pos = 0;
743
744        // Skip leading whitespace (indent before blockquote marker)
745        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
746            pos += 1;
747        }
748
749        // If no blockquote marker, return empty prefix
750        if pos >= bytes.len() || bytes[pos] != b'>' {
751            return ("", line);
752        }
753
754        // Skip all blockquote markers and spaces
755        while pos < bytes.len() {
756            if bytes[pos] == b'>' {
757                pos += 1;
758                // Skip optional space after >
759                if pos < bytes.len() && bytes[pos] == b' ' {
760                    pos += 1;
761                }
762            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
763                pos += 1;
764            } else {
765                break;
766            }
767        }
768
769        // Split at the position where content starts
770        (&line[..pos], &line[pos..])
771    }
772
773    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
774    ///
775    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
776    /// Returns:
777    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
778    /// - content: The content after the list marker
779    /// - content_indent: The number of spaces needed for continuation lines to align with content
780    ///
781    /// For example:
782    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
783    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
784    /// - `"  - table"` returns `("  - ", "table", 4)`
785    ///
786    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
787    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
788        let bytes = line.as_bytes();
789
790        // Skip leading whitespace
791        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
792        let mut pos = leading_spaces;
793
794        if pos >= bytes.len() {
795            return ("", line, 0);
796        }
797
798        // Check for unordered list marker: -, *, +
799        if matches!(bytes[pos], b'-' | b'*' | b'+') {
800            pos += 1;
801
802            // Must be followed by space or tab (or end of line for marker-only lines)
803            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
804                // Skip the space after marker if present
805                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
806                    pos += 1;
807                }
808                let content_indent = pos;
809                return (&line[..pos], &line[pos..], content_indent);
810            }
811            // Not a list marker (e.g., "-word" or "--")
812            return ("", line, 0);
813        }
814
815        // Check for ordered list marker: digits followed by . or ) then space
816        if bytes[pos].is_ascii_digit() {
817            let digit_start = pos;
818            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
819                pos += 1;
820            }
821
822            // Must have at least one digit
823            if pos > digit_start && pos < bytes.len() {
824                // Check for . or ) followed by space/tab
825                if bytes[pos] == b'.' || bytes[pos] == b')' {
826                    pos += 1;
827                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
828                        // Skip the space after marker if present
829                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
830                            pos += 1;
831                        }
832                        let content_indent = pos;
833                        return (&line[..pos], &line[pos..], content_indent);
834                    }
835                }
836            }
837        }
838
839        ("", line, 0)
840    }
841
842    /// Extract the table row content from a line, stripping any list/blockquote prefix.
843    ///
844    /// This is useful for processing table rows that may be inside list items or blockquotes.
845    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
846    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
847        // First strip blockquote prefix
848        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
849
850        // Then handle list prefix if present
851        if let Some(ref list_ctx) = table_block.list_context {
852            if line_index == 0 {
853                // Header line: strip list prefix (handles both markers and indentation)
854                after_blockquote
855                    .strip_prefix(&list_ctx.list_prefix)
856                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
857            } else {
858                // Continuation lines: strip indentation
859                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
860            }
861        } else {
862            after_blockquote
863        }
864    }
865
866    /// Check if the content after a list marker looks like a table row.
867    /// This is used to detect tables that start on the same line as a list marker.
868    pub fn is_list_item_with_table_row(line: &str) -> bool {
869        let (prefix, content, _) = Self::extract_list_prefix(line);
870        if prefix.is_empty() {
871            return false;
872        }
873
874        // Check if the content after the list marker is a table row
875        // It must start with | (proper table format within a list)
876        let trimmed = content.trim();
877        if !trimmed.starts_with('|') {
878            return false;
879        }
880
881        // Use our table row detection on the content
882        Self::is_potential_table_row_content(content)
883    }
884
885    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
886    fn is_potential_table_row_content(content: &str) -> bool {
887        let trimmed = content.trim();
888        if trimmed.is_empty() || !trimmed.contains('|') {
889            return false;
890        }
891
892        // Skip lines that are clearly code or inline code
893        if trimmed.starts_with('`') || trimmed.contains("``") {
894            return false;
895        }
896
897        // Must have at least 2 parts when split by |
898        let parts: Vec<&str> = trimmed.split('|').collect();
899        if parts.len() < 2 {
900            return false;
901        }
902
903        // Check if it looks like a table row by having reasonable content between pipes
904        let mut valid_parts = 0;
905        let mut total_non_empty_parts = 0;
906
907        for part in &parts {
908            let part_trimmed = part.trim();
909            if part_trimmed.is_empty() {
910                continue;
911            }
912            total_non_empty_parts += 1;
913
914            if !part_trimmed.contains('\n') {
915                valid_parts += 1;
916            }
917        }
918
919        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
920            return false;
921        }
922
923        if total_non_empty_parts == 0 {
924            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
925        }
926
927        if trimmed.starts_with('|') && trimmed.ends_with('|') {
928            valid_parts >= 1
929        } else {
930            valid_parts >= 2
931        }
932    }
933}
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938    use crate::lint_context::LintContext;
939
940    #[test]
941    fn test_is_potential_table_row() {
942        // Basic valid table rows
943        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
944        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
945        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
946        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
947
948        // Multiple cells
949        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
950
951        // With whitespace
952        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
953        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
954
955        // Not table rows
956        assert!(!TableUtils::is_potential_table_row("- List item"));
957        assert!(!TableUtils::is_potential_table_row("* Another list"));
958        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
959        assert!(!TableUtils::is_potential_table_row("Regular text"));
960        assert!(!TableUtils::is_potential_table_row(""));
961        assert!(!TableUtils::is_potential_table_row("   "));
962
963        // Code blocks
964        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
965        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
966
967        // Single pipe not enough
968        assert!(!TableUtils::is_potential_table_row("Just one |"));
969        assert!(!TableUtils::is_potential_table_row("| Just one"));
970
971        // Very long cells are valid in tables (no length limit for cell content)
972        let long_cell = "a".repeat(150);
973        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
974
975        // Cells with newlines
976        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
977
978        // Empty cells (Issue #129)
979        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
980        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
981        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
982    }
983
984    #[test]
985    fn test_list_items_with_pipes_not_table_rows() {
986        // Ordered list items should NOT be detected as table rows
987        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
988        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
989        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
990        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
991        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
992
993        // Unordered list items with tabs
994        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
995        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
996        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
997
998        // Indented list items (the trim_start normalizes indentation)
999        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
1000        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
1001        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
1002
1003        // Task list items
1004        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
1005        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
1006
1007        // Multiple pipes in list items
1008        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
1009        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
1010
1011        // These SHOULD still be detected as potential table rows
1012        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
1013        assert!(TableUtils::is_potential_table_row("cell | cell"));
1014        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
1015    }
1016
1017    #[test]
1018    fn test_atx_headings_with_pipes_not_table_rows() {
1019        // All 6 ATX heading levels with pipes
1020        assert!(!TableUtils::is_potential_table_row("# Heading | with pipe"));
1021        assert!(!TableUtils::is_potential_table_row("## Heading | with pipe"));
1022        assert!(!TableUtils::is_potential_table_row("### Heading | with pipe"));
1023        assert!(!TableUtils::is_potential_table_row("#### Heading | with pipe"));
1024        assert!(!TableUtils::is_potential_table_row("##### Heading | with pipe"));
1025        assert!(!TableUtils::is_potential_table_row("###### Heading | with pipe"));
1026
1027        // Multiple pipes in headings
1028        assert!(!TableUtils::is_potential_table_row("### col1 | col2 | col3"));
1029        assert!(!TableUtils::is_potential_table_row("## a|b|c"));
1030
1031        // Headings with tab after hashes
1032        assert!(!TableUtils::is_potential_table_row("#\tHeading | pipe"));
1033        assert!(!TableUtils::is_potential_table_row("##\tHeading | pipe"));
1034
1035        // Heading with only hashes and pipe (empty heading text)
1036        assert!(!TableUtils::is_potential_table_row("# |"));
1037        assert!(!TableUtils::is_potential_table_row("## |"));
1038
1039        // Indented headings (spaces before #)
1040        assert!(!TableUtils::is_potential_table_row("  ## Heading | pipe"));
1041        assert!(!TableUtils::is_potential_table_row("   ### Heading | pipe"));
1042
1043        // Unicode content in headings (the original proptest failure case)
1044        assert!(!TableUtils::is_potential_table_row("#### ®aAA|ᯗ"));
1045
1046        // 7+ hashes are NOT headings — should follow normal table detection
1047        // "####### text|pipe" has no space after 7 hashes if treated as non-heading
1048        // but with a space it still has 7+ hashes so not a heading
1049        assert!(TableUtils::is_potential_table_row("####### text | pipe"));
1050
1051        // Hash without space is NOT a heading, so pipe detection applies
1052        assert!(TableUtils::is_potential_table_row("#nospc|pipe"));
1053
1054        // These SHOULD still be detected as potential table rows
1055        assert!(TableUtils::is_potential_table_row("| # Header | Value |"));
1056        assert!(TableUtils::is_potential_table_row("text | #tag"));
1057    }
1058
1059    #[test]
1060    fn test_is_delimiter_row() {
1061        // Basic delimiter rows
1062        assert!(TableUtils::is_delimiter_row("|---|---|"));
1063        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1064        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1065        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1066
1067        // With varying dash counts
1068        assert!(TableUtils::is_delimiter_row("|-|--|"));
1069        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1070
1071        // With whitespace
1072        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1073        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1074
1075        // Multiple columns
1076        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1077
1078        // Without leading/trailing pipes
1079        assert!(TableUtils::is_delimiter_row("--- | ---"));
1080        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1081
1082        // Not delimiter rows
1083        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1084        assert!(!TableUtils::is_delimiter_row("Regular text"));
1085        assert!(!TableUtils::is_delimiter_row(""));
1086        assert!(!TableUtils::is_delimiter_row("|||"));
1087        assert!(!TableUtils::is_delimiter_row("| | |"));
1088
1089        // Must have dashes
1090        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1091        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1092
1093        // Mixed content
1094        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1095        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1096    }
1097
1098    #[test]
1099    fn test_count_cells() {
1100        // Basic counts
1101        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1102        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1103        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1104        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1105
1106        // Single cell
1107        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1108        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1109
1110        // Empty cells
1111        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1112        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1113
1114        // Many cells
1115        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1116
1117        // Edge cases
1118        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1119        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1120
1121        // No table
1122        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1123        assert_eq!(TableUtils::count_cells(""), 0);
1124        assert_eq!(TableUtils::count_cells("   "), 0);
1125
1126        // Whitespace handling
1127        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1128        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1129    }
1130
1131    #[test]
1132    fn test_count_cells_with_escaped_pipes() {
1133        // In GFM tables, escape handling happens BEFORE cell splitting.
1134        // Inline code does NOT protect pipes - they still act as cell delimiters.
1135        // To include a literal pipe in a table cell, you MUST escape it with \|
1136
1137        // Basic table structure
1138        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1139        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1140        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1141
1142        // Escaped pipes: \| keeps the pipe as content
1143        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1144        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1145
1146        // Escaped pipes inside backticks (correct way to include | in code in tables)
1147        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1148
1149        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1150        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1151        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
1152
1153        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
1154        // This matches GitHub's actual rendering where `a | b` splits into two cells
1155        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
1156        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
1157        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
1158
1159        // The regex example from Issue #34 - pipes in regex patterns need escaping
1160        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
1161        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
1162        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
1163        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1164    }
1165
1166    #[test]
1167    fn test_determine_pipe_style() {
1168        // All pipe styles
1169        assert_eq!(
1170            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1171            Some("leading_and_trailing")
1172        );
1173        assert_eq!(
1174            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1175            Some("leading_only")
1176        );
1177        assert_eq!(
1178            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1179            Some("trailing_only")
1180        );
1181        assert_eq!(
1182            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1183            Some("no_leading_or_trailing")
1184        );
1185
1186        // With whitespace
1187        assert_eq!(
1188            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1189            Some("leading_and_trailing")
1190        );
1191        assert_eq!(
1192            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1193            Some("leading_only")
1194        );
1195
1196        // No pipes
1197        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1198        assert_eq!(TableUtils::determine_pipe_style(""), None);
1199        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1200
1201        // Single pipe cases
1202        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1203        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1204        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1205    }
1206
1207    #[test]
1208    fn test_find_table_blocks_simple() {
1209        let content = "| Header 1 | Header 2 |
1210|-----------|-----------|
1211| Cell 1    | Cell 2    |
1212| Cell 3    | Cell 4    |";
1213
1214        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1215
1216        let tables = TableUtils::find_table_blocks(content, &ctx);
1217        assert_eq!(tables.len(), 1);
1218
1219        let table = &tables[0];
1220        assert_eq!(table.start_line, 0);
1221        assert_eq!(table.end_line, 3);
1222        assert_eq!(table.header_line, 0);
1223        assert_eq!(table.delimiter_line, 1);
1224        assert_eq!(table.content_lines, vec![2, 3]);
1225    }
1226
1227    #[test]
1228    fn test_find_table_blocks_multiple() {
1229        let content = "Some text
1230
1231| Table 1 | Col A |
1232|----------|-------|
1233| Data 1   | Val 1 |
1234
1235More text
1236
1237| Table 2 | Col 2 |
1238|----------|-------|
1239| Data 2   | Data  |";
1240
1241        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242
1243        let tables = TableUtils::find_table_blocks(content, &ctx);
1244        assert_eq!(tables.len(), 2);
1245
1246        // First table
1247        assert_eq!(tables[0].start_line, 2);
1248        assert_eq!(tables[0].end_line, 4);
1249        assert_eq!(tables[0].header_line, 2);
1250        assert_eq!(tables[0].delimiter_line, 3);
1251        assert_eq!(tables[0].content_lines, vec![4]);
1252
1253        // Second table
1254        assert_eq!(tables[1].start_line, 8);
1255        assert_eq!(tables[1].end_line, 10);
1256        assert_eq!(tables[1].header_line, 8);
1257        assert_eq!(tables[1].delimiter_line, 9);
1258        assert_eq!(tables[1].content_lines, vec![10]);
1259    }
1260
1261    #[test]
1262    fn test_find_table_blocks_no_content_rows() {
1263        let content = "| Header 1 | Header 2 |
1264|-----------|-----------|
1265
1266Next paragraph";
1267
1268        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1269
1270        let tables = TableUtils::find_table_blocks(content, &ctx);
1271        assert_eq!(tables.len(), 1);
1272
1273        let table = &tables[0];
1274        assert_eq!(table.start_line, 0);
1275        assert_eq!(table.end_line, 1); // Just header and delimiter
1276        assert_eq!(table.content_lines.len(), 0);
1277    }
1278
1279    #[test]
1280    fn test_find_table_blocks_in_code_block() {
1281        let content = "```
1282| Not | A | Table |
1283|-----|---|-------|
1284| In  | Code | Block |
1285```
1286
1287| Real | Table |
1288|------|-------|
1289| Data | Here  |";
1290
1291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1292
1293        let tables = TableUtils::find_table_blocks(content, &ctx);
1294        assert_eq!(tables.len(), 1); // Only the table outside code block
1295
1296        let table = &tables[0];
1297        assert_eq!(table.header_line, 6);
1298        assert_eq!(table.delimiter_line, 7);
1299    }
1300
1301    #[test]
1302    fn test_find_table_blocks_no_tables() {
1303        let content = "Just regular text
1304No tables here
1305- List item with | pipe
1306* Another list item";
1307
1308        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1309
1310        let tables = TableUtils::find_table_blocks(content, &ctx);
1311        assert_eq!(tables.len(), 0);
1312    }
1313
1314    #[test]
1315    fn test_find_table_blocks_malformed() {
1316        let content = "| Header without delimiter |
1317| This looks like table |
1318But no delimiter row
1319
1320| Proper | Table |
1321|---------|-------|
1322| Data    | Here  |";
1323
1324        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1325
1326        let tables = TableUtils::find_table_blocks(content, &ctx);
1327        assert_eq!(tables.len(), 1); // Only the proper table
1328        assert_eq!(tables[0].header_line, 4);
1329    }
1330
1331    #[test]
1332    fn test_edge_cases() {
1333        // Test empty content
1334        assert!(!TableUtils::is_potential_table_row(""));
1335        assert!(!TableUtils::is_delimiter_row(""));
1336        assert_eq!(TableUtils::count_cells(""), 0);
1337        assert_eq!(TableUtils::determine_pipe_style(""), None);
1338
1339        // Test whitespace only
1340        assert!(!TableUtils::is_potential_table_row("   "));
1341        assert!(!TableUtils::is_delimiter_row("   "));
1342        assert_eq!(TableUtils::count_cells("   "), 0);
1343        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1344
1345        // Test single character
1346        assert!(!TableUtils::is_potential_table_row("|"));
1347        assert!(!TableUtils::is_delimiter_row("|"));
1348        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1349
1350        // Test very long lines are valid table rows (no length limit)
1351        // Test both single-column and multi-column long lines
1352        let long_single = format!("| {} |", "a".repeat(200));
1353        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1354
1355        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1356        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1357
1358        // Test unicode
1359        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1360        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1361        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1362    }
1363
1364    #[test]
1365    fn test_table_block_struct() {
1366        let block = TableBlock {
1367            start_line: 0,
1368            end_line: 5,
1369            header_line: 0,
1370            delimiter_line: 1,
1371            content_lines: vec![2, 3, 4, 5],
1372            list_context: None,
1373        };
1374
1375        // Test Debug trait
1376        let debug_str = format!("{block:?}");
1377        assert!(debug_str.contains("TableBlock"));
1378        assert!(debug_str.contains("start_line: 0"));
1379
1380        // Test Clone trait
1381        let cloned = block.clone();
1382        assert_eq!(cloned.start_line, block.start_line);
1383        assert_eq!(cloned.end_line, block.end_line);
1384        assert_eq!(cloned.header_line, block.header_line);
1385        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1386        assert_eq!(cloned.content_lines, block.content_lines);
1387        assert!(cloned.list_context.is_none());
1388    }
1389
1390    #[test]
1391    fn test_split_table_row() {
1392        // Basic split
1393        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1394        assert_eq!(cells.len(), 3);
1395        assert_eq!(cells[0].trim(), "Cell 1");
1396        assert_eq!(cells[1].trim(), "Cell 2");
1397        assert_eq!(cells[2].trim(), "Cell 3");
1398
1399        // Without trailing pipe
1400        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1401        assert_eq!(cells.len(), 2);
1402
1403        // Empty cells
1404        let cells = TableUtils::split_table_row("| | | |");
1405        assert_eq!(cells.len(), 3);
1406
1407        // Single cell
1408        let cells = TableUtils::split_table_row("| Cell |");
1409        assert_eq!(cells.len(), 1);
1410        assert_eq!(cells[0].trim(), "Cell");
1411
1412        // No pipes
1413        let cells = TableUtils::split_table_row("No pipes here");
1414        assert_eq!(cells.len(), 0);
1415    }
1416
1417    #[test]
1418    fn test_split_table_row_with_escaped_pipes() {
1419        // Escaped pipes should be preserved in cell content
1420        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1421        assert_eq!(cells.len(), 2);
1422        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1423
1424        // Double backslash + pipe is NOT escaped
1425        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1426        assert_eq!(cells.len(), 3);
1427    }
1428
1429    #[test]
1430    fn test_split_table_row_with_flavor_mkdocs() {
1431        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1432        let cells =
1433            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1434        assert_eq!(cells.len(), 2);
1435        assert!(
1436            cells[1].contains("`x | y`"),
1437            "Inline code with pipe should be single cell in MkDocs flavor"
1438        );
1439
1440        // Multiple pipes in inline code
1441        let cells =
1442            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1443        assert_eq!(cells.len(), 2);
1444        assert!(cells[1].contains("`a | b | c`"));
1445    }
1446
1447    #[test]
1448    fn test_split_table_row_with_flavor_standard() {
1449        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
1450        let cells =
1451            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1452        // In GFM, `x | y` splits into separate cells
1453        assert_eq!(cells.len(), 3);
1454    }
1455
1456    // === extract_blockquote_prefix tests ===
1457
1458    #[test]
1459    fn test_extract_blockquote_prefix_no_blockquote() {
1460        // Regular table row without blockquote
1461        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1462        assert_eq!(prefix, "");
1463        assert_eq!(content, "| H1 | H2 |");
1464    }
1465
1466    #[test]
1467    fn test_extract_blockquote_prefix_single_level() {
1468        // Single blockquote level
1469        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1470        assert_eq!(prefix, "> ");
1471        assert_eq!(content, "| H1 | H2 |");
1472    }
1473
1474    #[test]
1475    fn test_extract_blockquote_prefix_double_level() {
1476        // Double blockquote level
1477        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1478        assert_eq!(prefix, ">> ");
1479        assert_eq!(content, "| H1 | H2 |");
1480    }
1481
1482    #[test]
1483    fn test_extract_blockquote_prefix_triple_level() {
1484        // Triple blockquote level
1485        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1486        assert_eq!(prefix, ">>> ");
1487        assert_eq!(content, "| H1 | H2 |");
1488    }
1489
1490    #[test]
1491    fn test_extract_blockquote_prefix_with_spaces() {
1492        // Blockquote with spaces between markers
1493        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1494        assert_eq!(prefix, "> > ");
1495        assert_eq!(content, "| H1 | H2 |");
1496    }
1497
1498    #[test]
1499    fn test_extract_blockquote_prefix_indented() {
1500        // Indented blockquote
1501        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1502        assert_eq!(prefix, "  > ");
1503        assert_eq!(content, "| H1 | H2 |");
1504    }
1505
1506    #[test]
1507    fn test_extract_blockquote_prefix_no_space_after() {
1508        // Blockquote without space after marker
1509        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1510        assert_eq!(prefix, ">");
1511        assert_eq!(content, "| H1 | H2 |");
1512    }
1513
1514    #[test]
1515    fn test_determine_pipe_style_in_blockquote() {
1516        // determine_pipe_style should handle blockquotes correctly
1517        assert_eq!(
1518            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1519            Some("leading_and_trailing")
1520        );
1521        assert_eq!(
1522            TableUtils::determine_pipe_style("> H1 | H2"),
1523            Some("no_leading_or_trailing")
1524        );
1525        assert_eq!(
1526            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1527            Some("leading_and_trailing")
1528        );
1529        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1530    }
1531
1532    #[test]
1533    fn test_list_table_delimiter_requires_indentation() {
1534        // Test case: list item contains pipe, but delimiter line is at column 1
1535        // This should NOT be detected as a list table since the delimiter has no indentation.
1536        // The result is a non-list table starting at line 0 (the list item becomes the header)
1537        // but list_context should be None.
1538        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1539        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1540        let tables = TableUtils::find_table_blocks(content, &ctx);
1541
1542        // The table will be detected starting at line 0, but crucially it should NOT have
1543        // list_context set, meaning it won't be treated as a list-table for column count purposes
1544        assert_eq!(tables.len(), 1, "Should find exactly one table");
1545        assert!(
1546            tables[0].list_context.is_none(),
1547            "Should NOT have list context since delimiter has no indentation"
1548        );
1549    }
1550
1551    #[test]
1552    fn test_list_table_with_properly_indented_delimiter() {
1553        // Test case: list item with table header, delimiter properly indented
1554        // This SHOULD be detected as a list table
1555        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1557        let tables = TableUtils::find_table_blocks(content, &ctx);
1558
1559        // Should find exactly one list-table starting at line 0
1560        assert_eq!(tables.len(), 1, "Should find exactly one table");
1561        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1562        assert!(
1563            tables[0].list_context.is_some(),
1564            "Should be a list table since delimiter is properly indented"
1565        );
1566    }
1567}