Skip to main content

rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13    /// If the table is inside a list item, this contains:
14    /// - The list marker prefix for the header line (e.g., "- ", "1. ")
15    /// - The content indent (number of spaces for continuation lines)
16    pub list_context: Option<ListTableContext>,
17}
18
19/// Context information for tables inside list items
20#[derive(Debug, Clone)]
21pub struct ListTableContext {
22    /// The list marker prefix including any leading whitespace (e.g., "- ", "  1. ")
23    pub list_prefix: String,
24    /// Number of spaces for continuation lines to align with content
25    pub content_indent: usize,
26}
27
28/// Shared table detection utilities
29pub struct TableUtils;
30
31impl TableUtils {
32    /// Check if a line looks like a potential table row
33    pub fn is_potential_table_row(line: &str) -> bool {
34        let trimmed = line.trim();
35        if trimmed.is_empty() || !trimmed.contains('|') {
36            return false;
37        }
38
39        // Skip lines that are clearly not table rows
40        // Unordered list items with space or tab after marker
41        if trimmed.starts_with("- ")
42            || trimmed.starts_with("* ")
43            || trimmed.starts_with("+ ")
44            || trimmed.starts_with("-\t")
45            || trimmed.starts_with("*\t")
46            || trimmed.starts_with("+\t")
47        {
48            return false;
49        }
50
51        // Skip ordered list items: digits followed by . or ) then space/tab
52        if let Some(first_non_digit) = trimmed.find(|c: char| !c.is_ascii_digit())
53            && first_non_digit > 0
54        {
55            let after_digits = &trimmed[first_non_digit..];
56            if after_digits.starts_with(". ")
57                || after_digits.starts_with(".\t")
58                || after_digits.starts_with(") ")
59                || after_digits.starts_with(")\t")
60            {
61                return false;
62            }
63        }
64
65        // Skip lines that are clearly code or inline code
66        if trimmed.starts_with("`") || trimmed.contains("``") {
67            return false;
68        }
69
70        // Must have at least 2 parts when split by |
71        let parts: Vec<&str> = trimmed.split('|').collect();
72        if parts.len() < 2 {
73            return false;
74        }
75
76        // Check if it looks like a table row by having reasonable content between pipes
77        let mut valid_parts = 0;
78        let mut total_non_empty_parts = 0;
79
80        for part in &parts {
81            let part_trimmed = part.trim();
82            // Skip empty parts (from leading/trailing pipes)
83            if part_trimmed.is_empty() {
84                continue;
85            }
86            total_non_empty_parts += 1;
87
88            // Count parts that look like table cells (reasonable content, no newlines)
89            if !part_trimmed.contains('\n') {
90                valid_parts += 1;
91            }
92        }
93
94        // Check if all non-empty parts are valid (no newlines)
95        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
96            // Some cells contain newlines, not a valid table row
97            return false;
98        }
99
100        // GFM allows tables with all empty cells (e.g., |||)
101        // These are valid if they have proper table formatting (leading and trailing pipes)
102        if total_non_empty_parts == 0 {
103            // Empty cells are only valid with proper pipe formatting
104            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
105        }
106
107        // GFM allows single-column tables, so >= 1 valid part is enough
108        // when the line has proper table formatting (pipes)
109        if trimmed.starts_with('|') && trimmed.ends_with('|') {
110            // Properly formatted table row with pipes on both ends
111            valid_parts >= 1
112        } else {
113            // For rows without proper pipe formatting, require at least 2 cells
114            valid_parts >= 2
115        }
116    }
117
118    /// Check if a line is a table delimiter row (e.g., |---|---|)
119    pub fn is_delimiter_row(line: &str) -> bool {
120        let trimmed = line.trim();
121        if !trimmed.contains('|') || !trimmed.contains('-') {
122            return false;
123        }
124
125        // Split by pipes and check each part
126        let parts: Vec<&str> = trimmed.split('|').collect();
127        let mut valid_delimiter_parts = 0;
128        let mut total_non_empty_parts = 0;
129
130        for part in &parts {
131            let part_trimmed = part.trim();
132            if part_trimmed.is_empty() {
133                continue; // Skip empty parts from leading/trailing pipes
134            }
135
136            total_non_empty_parts += 1;
137
138            // Check if this part looks like a delimiter (contains dashes and optionally colons)
139            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
140                valid_delimiter_parts += 1;
141            }
142        }
143
144        // All non-empty parts must be valid delimiters, and there must be at least one
145        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
146    }
147
148    /// Strip blockquote prefix from a line, returning the content without the prefix
149    fn strip_blockquote_prefix(line: &str) -> &str {
150        let trimmed = line.trim_start();
151        if trimmed.starts_with('>') {
152            // Strip all blockquote markers and following space
153            let mut rest = trimmed;
154            while rest.starts_with('>') {
155                rest = rest.strip_prefix('>').unwrap_or(rest);
156                rest = rest.trim_start_matches(' ');
157            }
158            rest
159        } else {
160            line
161        }
162    }
163
164    /// Find all table blocks in the content with optimized detection
165    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
166    pub fn find_table_blocks_with_code_info(
167        content: &str,
168        code_blocks: &[(usize, usize)],
169        code_spans: &[crate::lint_context::CodeSpan],
170        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
171    ) -> Vec<TableBlock> {
172        let lines: Vec<&str> = content.lines().collect();
173        let mut tables = Vec::new();
174        let mut i = 0;
175
176        // Pre-compute line positions for efficient code block checking
177        let mut line_positions = Vec::with_capacity(lines.len());
178        let mut pos = 0;
179        for line in &lines {
180            line_positions.push(pos);
181            pos += line.len() + 1; // +1 for newline
182        }
183
184        // Stack of active list content indents for continuation table tracking.
185        // Supports nested lists: when a child list is seen, we push; when we
186        // dedent past a level, we pop back to the enclosing list.
187        let mut list_indent_stack: Vec<usize> = Vec::new();
188
189        while i < lines.len() {
190            // Skip lines in code blocks, code spans, or HTML comments
191            let line_start = line_positions[i];
192            let in_code =
193                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
194                    || code_spans
195                        .iter()
196                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
197            let in_html_comment = html_comment_ranges
198                .iter()
199                .any(|range| line_start >= range.start && line_start < range.end);
200
201            if in_code || in_html_comment {
202                i += 1;
203                continue;
204            }
205
206            // Strip blockquote prefix for table detection
207            let line_content = Self::strip_blockquote_prefix(lines[i]);
208
209            // Update active list tracking
210            let (list_prefix, list_content, content_indent) = Self::extract_list_prefix(line_content);
211            if !list_prefix.is_empty() {
212                // Line has a list marker. Pop any deeper/equal levels, then push this one.
213                while list_indent_stack.last().is_some_and(|&top| top >= content_indent) {
214                    list_indent_stack.pop();
215                }
216                list_indent_stack.push(content_indent);
217            } else if !line_content.trim().is_empty() {
218                // Non-blank line without a marker: pop any levels we've dedented past
219                let leading = line_content.len() - line_content.trim_start().len();
220                while list_indent_stack.last().is_some_and(|&top| leading < top) {
221                    list_indent_stack.pop();
222                }
223            }
224            // Blank lines keep the stack unchanged (blank lines don't end list items)
225
226            // Check if this is a list item that contains a table row on the same line,
227            // or a continuation table indented under an active list item
228            let (is_same_line_list_table, effective_content) =
229                if !list_prefix.is_empty() && Self::is_potential_table_row_content(list_content) {
230                    (true, list_content)
231                } else {
232                    (false, line_content)
233                };
234
235            // Detect continuation list tables: no marker on this line, but indented
236            // under an active list item (e.g., "- Text\n  | h1 | h2 |")
237            let continuation_indent = if !is_same_line_list_table && list_prefix.is_empty() {
238                let leading = line_content.len() - line_content.trim_start().len();
239                // Find the deepest list level this line is indented under
240                list_indent_stack
241                    .iter()
242                    .rev()
243                    .find(|&&indent| leading >= indent)
244                    .copied()
245            } else {
246                None
247            };
248
249            let is_continuation_list_table = continuation_indent.is_some()
250                && {
251                    let indent = continuation_indent.unwrap();
252                    let leading = line_content.len() - line_content.trim_start().len();
253                    // Per CommonMark, 4+ spaces beyond content indent is a code block
254                    leading < indent + 4
255                }
256                && Self::is_potential_table_row(effective_content);
257
258            let is_any_list_table = is_same_line_list_table || is_continuation_list_table;
259
260            // For continuation list tables, use the matched list indent
261            let effective_content_indent = if is_same_line_list_table {
262                content_indent
263            } else if is_continuation_list_table {
264                continuation_indent.unwrap()
265            } else {
266                0
267            };
268
269            // Look for potential table start
270            if is_any_list_table || Self::is_potential_table_row(effective_content) {
271                // For list tables (same-line or continuation), check indented continuation lines
272                // For regular tables, check the next line directly
273                let (next_line_content, delimiter_has_valid_indent) = if i + 1 < lines.len() {
274                    let next_raw = Self::strip_blockquote_prefix(lines[i + 1]);
275                    if is_any_list_table {
276                        // Verify the delimiter line has proper indentation
277                        let leading_spaces = next_raw.len() - next_raw.trim_start().len();
278                        if leading_spaces >= effective_content_indent {
279                            // Has proper indentation, strip it and check as delimiter
280                            (
281                                Self::strip_list_continuation_indent(next_raw, effective_content_indent),
282                                true,
283                            )
284                        } else {
285                            // Not enough indentation - not a list table
286                            (next_raw, false)
287                        }
288                    } else {
289                        (next_raw, true)
290                    }
291                } else {
292                    ("", true)
293                };
294
295                // For list tables, only accept if delimiter has valid indentation
296                let effective_is_list_table = is_any_list_table && delimiter_has_valid_indent;
297
298                if i + 1 < lines.len() && Self::is_delimiter_row(next_line_content) {
299                    // Found a table! Find its end
300                    let table_start = i;
301                    let header_line = i;
302                    let delimiter_line = i + 1;
303                    let mut table_end = i + 1; // Include the delimiter row
304                    let mut content_lines = Vec::new();
305
306                    // Continue while we have table rows
307                    let mut j = i + 2;
308                    while j < lines.len() {
309                        let line = lines[j];
310                        // Strip blockquote prefix for checking
311                        let raw_content = Self::strip_blockquote_prefix(line);
312
313                        // For list tables, strip expected indentation
314                        let line_content = if effective_is_list_table {
315                            Self::strip_list_continuation_indent(raw_content, effective_content_indent)
316                        } else {
317                            raw_content
318                        };
319
320                        if line_content.trim().is_empty() {
321                            // Empty line ends the table
322                            break;
323                        }
324
325                        // For list tables, the continuation line must have proper indentation
326                        if effective_is_list_table {
327                            let leading_spaces = raw_content.len() - raw_content.trim_start().len();
328                            if leading_spaces < effective_content_indent {
329                                // Not enough indentation - end of table
330                                break;
331                            }
332                        }
333
334                        if Self::is_potential_table_row(line_content) {
335                            content_lines.push(j);
336                            table_end = j;
337                            j += 1;
338                        } else {
339                            // Non-table line ends the table
340                            break;
341                        }
342                    }
343
344                    let list_context = if effective_is_list_table {
345                        if is_same_line_list_table {
346                            // Same-line: prefix is the actual list marker (e.g., "- ")
347                            Some(ListTableContext {
348                                list_prefix: list_prefix.to_string(),
349                                content_indent: effective_content_indent,
350                            })
351                        } else {
352                            // Continuation: prefix is the indentation spaces
353                            Some(ListTableContext {
354                                list_prefix: " ".repeat(effective_content_indent),
355                                content_indent: effective_content_indent,
356                            })
357                        }
358                    } else {
359                        None
360                    };
361
362                    tables.push(TableBlock {
363                        start_line: table_start,
364                        end_line: table_end,
365                        header_line,
366                        delimiter_line,
367                        content_lines,
368                        list_context,
369                    });
370                    i = table_end + 1;
371                } else {
372                    i += 1;
373                }
374            } else {
375                i += 1;
376            }
377        }
378
379        tables
380    }
381
382    /// Strip list continuation indentation from a line.
383    /// For lines that are continuations of a list item's content, strip the expected indent.
384    fn strip_list_continuation_indent(line: &str, expected_indent: usize) -> &str {
385        let bytes = line.as_bytes();
386        let mut spaces = 0;
387
388        for &b in bytes {
389            if b == b' ' {
390                spaces += 1;
391            } else if b == b'\t' {
392                // Tab counts as up to 4 spaces, rounding up to next multiple of 4
393                spaces = (spaces / 4 + 1) * 4;
394            } else {
395                break;
396            }
397
398            if spaces >= expected_indent {
399                break;
400            }
401        }
402
403        // Strip at most expected_indent characters
404        let strip_count = spaces.min(expected_indent).min(line.len());
405        // Count actual bytes to strip (handling tabs)
406        let mut byte_count = 0;
407        let mut counted_spaces = 0;
408        for &b in bytes {
409            if counted_spaces >= strip_count {
410                break;
411            }
412            if b == b' ' {
413                counted_spaces += 1;
414                byte_count += 1;
415            } else if b == b'\t' {
416                counted_spaces = (counted_spaces / 4 + 1) * 4;
417                byte_count += 1;
418            } else {
419                break;
420            }
421        }
422
423        &line[byte_count..]
424    }
425
426    /// Find all table blocks in the content with optimized detection
427    /// This is a backward-compatible wrapper that accepts LintContext
428    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
429        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
430    }
431
432    /// Count the number of cells in a table row
433    pub fn count_cells(row: &str) -> usize {
434        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
435    }
436
437    /// Count the number of cells in a table row with flavor-specific behavior
438    ///
439    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
440    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
441    ///
442    /// This function strips blockquote prefixes before counting cells, so it works
443    /// correctly for tables inside blockquotes.
444    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
445        // Strip blockquote prefix if present before counting cells
446        let (_, content) = Self::extract_blockquote_prefix(row);
447        Self::split_table_row_with_flavor(content, flavor).len()
448    }
449
450    /// Mask pipes inside inline code blocks with a placeholder character
451    pub fn mask_pipes_in_inline_code(text: &str) -> String {
452        let mut result = String::new();
453        let chars: Vec<char> = text.chars().collect();
454        let mut i = 0;
455
456        while i < chars.len() {
457            if chars[i] == '`' {
458                // Count consecutive backticks at start
459                let start = i;
460                let mut backtick_count = 0;
461                while i < chars.len() && chars[i] == '`' {
462                    backtick_count += 1;
463                    i += 1;
464                }
465
466                // Look for matching closing backticks
467                let mut found_closing = false;
468                let mut j = i;
469
470                while j < chars.len() {
471                    if chars[j] == '`' {
472                        // Count potential closing backticks
473                        let close_start = j;
474                        let mut close_count = 0;
475                        while j < chars.len() && chars[j] == '`' {
476                            close_count += 1;
477                            j += 1;
478                        }
479
480                        if close_count == backtick_count {
481                            // Found matching closing backticks
482                            found_closing = true;
483
484                            // Valid inline code - add with pipes masked
485                            result.extend(chars[start..i].iter());
486
487                            for &ch in chars.iter().take(close_start).skip(i) {
488                                if ch == '|' {
489                                    result.push('_'); // Mask pipe with underscore
490                                } else {
491                                    result.push(ch);
492                                }
493                            }
494
495                            result.extend(chars[close_start..j].iter());
496                            i = j;
497                            break;
498                        }
499                        // If not matching, continue searching (j is already past these backticks)
500                    } else {
501                        j += 1;
502                    }
503                }
504
505                if !found_closing {
506                    // No matching closing found, treat as regular text
507                    result.extend(chars[start..i].iter());
508                }
509            } else {
510                result.push(chars[i]);
511                i += 1;
512            }
513        }
514
515        result
516    }
517
518    /// Escape pipes inside inline code blocks with backslash.
519    /// Converts `|` to `\|` inside backtick spans.
520    /// Used by auto-fix to preserve content while making tables valid.
521    pub fn escape_pipes_in_inline_code(text: &str) -> String {
522        let mut result = String::new();
523        let chars: Vec<char> = text.chars().collect();
524        let mut i = 0;
525
526        while i < chars.len() {
527            if chars[i] == '`' {
528                let start = i;
529                let mut backtick_count = 0;
530                while i < chars.len() && chars[i] == '`' {
531                    backtick_count += 1;
532                    i += 1;
533                }
534
535                let mut found_closing = false;
536                let mut j = i;
537
538                while j < chars.len() {
539                    if chars[j] == '`' {
540                        let close_start = j;
541                        let mut close_count = 0;
542                        while j < chars.len() && chars[j] == '`' {
543                            close_count += 1;
544                            j += 1;
545                        }
546
547                        if close_count == backtick_count {
548                            found_closing = true;
549                            result.extend(chars[start..i].iter());
550
551                            for &ch in chars.iter().take(close_start).skip(i) {
552                                if ch == '|' {
553                                    result.push('\\');
554                                    result.push('|');
555                                } else {
556                                    result.push(ch);
557                                }
558                            }
559
560                            result.extend(chars[close_start..j].iter());
561                            i = j;
562                            break;
563                        }
564                    } else {
565                        j += 1;
566                    }
567                }
568
569                if !found_closing {
570                    result.extend(chars[start..i].iter());
571                }
572            } else {
573                result.push(chars[i]);
574                i += 1;
575            }
576        }
577
578        result
579    }
580
581    /// Mask escaped pipes for accurate table cell parsing
582    ///
583    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
584    /// - `\|` → escaped pipe → masked (stays as cell content)
585    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
586    ///
587    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
588    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
589    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
590    ///
591    /// To include a literal pipe in a table cell (even in code), you must escape it:
592    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
593    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
594        let mut result = String::new();
595        let chars: Vec<char> = text.chars().collect();
596        let mut i = 0;
597
598        while i < chars.len() {
599            if chars[i] == '\\' {
600                if i + 1 < chars.len() && chars[i + 1] == '\\' {
601                    // Escaped backslash: \\ → push both and continue
602                    // The next character (if it's a pipe) will be a real delimiter
603                    result.push('\\');
604                    result.push('\\');
605                    i += 2;
606                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
607                    // Escaped pipe: \| → mask the pipe
608                    result.push('\\');
609                    result.push('_'); // Mask the pipe
610                    i += 2;
611                } else {
612                    // Single backslash not followed by \ or | → just push it
613                    result.push(chars[i]);
614                    i += 1;
615                }
616            } else {
617                result.push(chars[i]);
618                i += 1;
619            }
620        }
621
622        result
623    }
624
625    /// Split a table row into individual cell contents with flavor-specific behavior.
626    ///
627    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
628    /// This is the foundation for both cell counting and cell content extraction.
629    ///
630    /// For Standard/GFM flavor, pipes in inline code ARE cell delimiters (matches GitHub).
631    /// For MkDocs flavor, pipes in inline code are NOT cell delimiters.
632    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
633        let trimmed = row.trim();
634
635        if !trimmed.contains('|') {
636            return Vec::new();
637        }
638
639        // First, mask escaped pipes (same for all flavors)
640        let masked = Self::mask_pipes_for_table_parsing(trimmed);
641
642        // For MkDocs flavor, also mask pipes inside inline code
643        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
644            Self::mask_pipes_in_inline_code(&masked)
645        } else {
646            masked
647        };
648
649        let has_leading = final_masked.starts_with('|');
650        let has_trailing = final_masked.ends_with('|');
651
652        let mut masked_content = final_masked.as_str();
653        let mut orig_content = trimmed;
654
655        if has_leading {
656            masked_content = &masked_content[1..];
657            orig_content = &orig_content[1..];
658        }
659
660        // Track whether we actually strip a trailing pipe
661        let stripped_trailing = has_trailing && !masked_content.is_empty();
662        if stripped_trailing {
663            masked_content = &masked_content[..masked_content.len() - 1];
664            orig_content = &orig_content[..orig_content.len() - 1];
665        }
666
667        // Handle edge cases for degenerate inputs
668        if masked_content.is_empty() {
669            if stripped_trailing {
670                // "||" case: two pipes with empty content between = one empty cell
671                return vec![String::new()];
672            } else {
673                // "|" case: single pipe, not a valid table row
674                return Vec::new();
675            }
676        }
677
678        let masked_parts: Vec<&str> = masked_content.split('|').collect();
679        let mut cells = Vec::new();
680        let mut pos = 0;
681
682        for masked_cell in masked_parts {
683            let cell_len = masked_cell.len();
684            let orig_cell = if pos + cell_len <= orig_content.len() {
685                &orig_content[pos..pos + cell_len]
686            } else {
687                masked_cell
688            };
689            cells.push(orig_cell.to_string());
690            pos += cell_len + 1; // +1 for the pipe delimiter
691        }
692
693        cells
694    }
695
696    /// Split a table row into individual cell contents using Standard/GFM behavior.
697    pub fn split_table_row(row: &str) -> Vec<String> {
698        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
699    }
700
701    /// Determine the pipe style of a table row
702    ///
703    /// Handles tables inside blockquotes by stripping the blockquote prefix
704    /// before analyzing the pipe style.
705    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
706        // Strip blockquote prefix if present before analyzing pipe style
707        let content = Self::strip_blockquote_prefix(line);
708        let trimmed = content.trim();
709        if !trimmed.contains('|') {
710            return None;
711        }
712
713        let has_leading = trimmed.starts_with('|');
714        let has_trailing = trimmed.ends_with('|');
715
716        match (has_leading, has_trailing) {
717            (true, true) => Some("leading_and_trailing"),
718            (true, false) => Some("leading_only"),
719            (false, true) => Some("trailing_only"),
720            (false, false) => Some("no_leading_or_trailing"),
721        }
722    }
723
724    /// Extract blockquote prefix from a line, returning (prefix, content).
725    ///
726    /// This is useful for stripping the prefix before processing, then restoring it after.
727    /// For example: `"> | H1 | H2 |"` returns `("> ", "| H1 | H2 |")`.
728    pub fn extract_blockquote_prefix(line: &str) -> (&str, &str) {
729        // Find where the actual content starts (after blockquote markers and spaces)
730        let bytes = line.as_bytes();
731        let mut pos = 0;
732
733        // Skip leading whitespace (indent before blockquote marker)
734        while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
735            pos += 1;
736        }
737
738        // If no blockquote marker, return empty prefix
739        if pos >= bytes.len() || bytes[pos] != b'>' {
740            return ("", line);
741        }
742
743        // Skip all blockquote markers and spaces
744        while pos < bytes.len() {
745            if bytes[pos] == b'>' {
746                pos += 1;
747                // Skip optional space after >
748                if pos < bytes.len() && bytes[pos] == b' ' {
749                    pos += 1;
750                }
751            } else if bytes[pos] == b' ' || bytes[pos] == b'\t' {
752                pos += 1;
753            } else {
754                break;
755            }
756        }
757
758        // Split at the position where content starts
759        (&line[..pos], &line[pos..])
760    }
761
762    /// Extract list marker prefix from a line, returning (prefix, content, content_indent).
763    ///
764    /// This handles unordered list markers (`-`, `*`, `+`) and ordered list markers (`1.`, `10)`, etc.)
765    /// Returns:
766    /// - prefix: The list marker including any leading whitespace and trailing space (e.g., "- ", "  1. ")
767    /// - content: The content after the list marker
768    /// - content_indent: The number of spaces needed for continuation lines to align with content
769    ///
770    /// For example:
771    /// - `"- | H1 | H2 |"` returns `("- ", "| H1 | H2 |", 2)`
772    /// - `"1. | H1 | H2 |"` returns `("1. ", "| H1 | H2 |", 3)`
773    /// - `"  - table"` returns `("  - ", "table", 4)`
774    ///
775    /// Returns `("", line, 0)` if the line doesn't start with a list marker.
776    pub fn extract_list_prefix(line: &str) -> (&str, &str, usize) {
777        let bytes = line.as_bytes();
778
779        // Skip leading whitespace
780        let leading_spaces = bytes.iter().take_while(|&&b| b == b' ' || b == b'\t').count();
781        let mut pos = leading_spaces;
782
783        if pos >= bytes.len() {
784            return ("", line, 0);
785        }
786
787        // Check for unordered list marker: -, *, +
788        if matches!(bytes[pos], b'-' | b'*' | b'+') {
789            pos += 1;
790
791            // Must be followed by space or tab (or end of line for marker-only lines)
792            if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
793                // Skip the space after marker if present
794                if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
795                    pos += 1;
796                }
797                let content_indent = pos;
798                return (&line[..pos], &line[pos..], content_indent);
799            }
800            // Not a list marker (e.g., "-word" or "--")
801            return ("", line, 0);
802        }
803
804        // Check for ordered list marker: digits followed by . or ) then space
805        if bytes[pos].is_ascii_digit() {
806            let digit_start = pos;
807            while pos < bytes.len() && bytes[pos].is_ascii_digit() {
808                pos += 1;
809            }
810
811            // Must have at least one digit
812            if pos > digit_start && pos < bytes.len() {
813                // Check for . or ) followed by space/tab
814                if bytes[pos] == b'.' || bytes[pos] == b')' {
815                    pos += 1;
816                    if pos >= bytes.len() || bytes[pos] == b' ' || bytes[pos] == b'\t' {
817                        // Skip the space after marker if present
818                        if pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
819                            pos += 1;
820                        }
821                        let content_indent = pos;
822                        return (&line[..pos], &line[pos..], content_indent);
823                    }
824                }
825            }
826        }
827
828        ("", line, 0)
829    }
830
831    /// Extract the table row content from a line, stripping any list/blockquote prefix.
832    ///
833    /// This is useful for processing table rows that may be inside list items or blockquotes.
834    /// The line_index indicates which line of the table this is (0 = header, 1 = delimiter, etc.)
835    pub fn extract_table_row_content<'a>(line: &'a str, table_block: &TableBlock, line_index: usize) -> &'a str {
836        // First strip blockquote prefix
837        let (_, after_blockquote) = Self::extract_blockquote_prefix(line);
838
839        // Then handle list prefix if present
840        if let Some(ref list_ctx) = table_block.list_context {
841            if line_index == 0 {
842                // Header line: strip list prefix (handles both markers and indentation)
843                after_blockquote
844                    .strip_prefix(&list_ctx.list_prefix)
845                    .unwrap_or_else(|| Self::extract_list_prefix(after_blockquote).1)
846            } else {
847                // Continuation lines: strip indentation
848                Self::strip_list_continuation_indent(after_blockquote, list_ctx.content_indent)
849            }
850        } else {
851            after_blockquote
852        }
853    }
854
855    /// Check if the content after a list marker looks like a table row.
856    /// This is used to detect tables that start on the same line as a list marker.
857    pub fn is_list_item_with_table_row(line: &str) -> bool {
858        let (prefix, content, _) = Self::extract_list_prefix(line);
859        if prefix.is_empty() {
860            return false;
861        }
862
863        // Check if the content after the list marker is a table row
864        // It must start with | (proper table format within a list)
865        let trimmed = content.trim();
866        if !trimmed.starts_with('|') {
867            return false;
868        }
869
870        // Use our table row detection on the content
871        Self::is_potential_table_row_content(content)
872    }
873
874    /// Internal helper: Check if content (without list/blockquote prefix) looks like a table row.
875    fn is_potential_table_row_content(content: &str) -> bool {
876        let trimmed = content.trim();
877        if trimmed.is_empty() || !trimmed.contains('|') {
878            return false;
879        }
880
881        // Skip lines that are clearly code or inline code
882        if trimmed.starts_with('`') || trimmed.contains("``") {
883            return false;
884        }
885
886        // Must have at least 2 parts when split by |
887        let parts: Vec<&str> = trimmed.split('|').collect();
888        if parts.len() < 2 {
889            return false;
890        }
891
892        // Check if it looks like a table row by having reasonable content between pipes
893        let mut valid_parts = 0;
894        let mut total_non_empty_parts = 0;
895
896        for part in &parts {
897            let part_trimmed = part.trim();
898            if part_trimmed.is_empty() {
899                continue;
900            }
901            total_non_empty_parts += 1;
902
903            if !part_trimmed.contains('\n') {
904                valid_parts += 1;
905            }
906        }
907
908        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
909            return false;
910        }
911
912        if total_non_empty_parts == 0 {
913            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
914        }
915
916        if trimmed.starts_with('|') && trimmed.ends_with('|') {
917            valid_parts >= 1
918        } else {
919            valid_parts >= 2
920        }
921    }
922}
923
924#[cfg(test)]
925mod tests {
926    use super::*;
927    use crate::lint_context::LintContext;
928
929    #[test]
930    fn test_is_potential_table_row() {
931        // Basic valid table rows
932        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
933        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
934        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
935        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
936
937        // Multiple cells
938        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
939
940        // With whitespace
941        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
942        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
943
944        // Not table rows
945        assert!(!TableUtils::is_potential_table_row("- List item"));
946        assert!(!TableUtils::is_potential_table_row("* Another list"));
947        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
948        assert!(!TableUtils::is_potential_table_row("Regular text"));
949        assert!(!TableUtils::is_potential_table_row(""));
950        assert!(!TableUtils::is_potential_table_row("   "));
951
952        // Code blocks
953        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
954        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
955
956        // Single pipe not enough
957        assert!(!TableUtils::is_potential_table_row("Just one |"));
958        assert!(!TableUtils::is_potential_table_row("| Just one"));
959
960        // Very long cells are valid in tables (no length limit for cell content)
961        let long_cell = "a".repeat(150);
962        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
963
964        // Cells with newlines
965        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
966
967        // Empty cells (Issue #129)
968        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
969        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
970        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
971    }
972
973    #[test]
974    fn test_list_items_with_pipes_not_table_rows() {
975        // Ordered list items should NOT be detected as table rows
976        assert!(!TableUtils::is_potential_table_row("1. Item with | pipe"));
977        assert!(!TableUtils::is_potential_table_row("10. Item with | pipe"));
978        assert!(!TableUtils::is_potential_table_row("999. Item with | pipe"));
979        assert!(!TableUtils::is_potential_table_row("1) Item with | pipe"));
980        assert!(!TableUtils::is_potential_table_row("10) Item with | pipe"));
981
982        // Unordered list items with tabs
983        assert!(!TableUtils::is_potential_table_row("-\tItem with | pipe"));
984        assert!(!TableUtils::is_potential_table_row("*\tItem with | pipe"));
985        assert!(!TableUtils::is_potential_table_row("+\tItem with | pipe"));
986
987        // Indented list items (the trim_start normalizes indentation)
988        assert!(!TableUtils::is_potential_table_row("  - Indented | pipe"));
989        assert!(!TableUtils::is_potential_table_row("    * Deep indent | pipe"));
990        assert!(!TableUtils::is_potential_table_row("  1. Ordered indent | pipe"));
991
992        // Task list items
993        assert!(!TableUtils::is_potential_table_row("- [ ] task | pipe"));
994        assert!(!TableUtils::is_potential_table_row("- [x] done | pipe"));
995
996        // Multiple pipes in list items
997        assert!(!TableUtils::is_potential_table_row("1. foo | bar | baz"));
998        assert!(!TableUtils::is_potential_table_row("- alpha | beta | gamma"));
999
1000        // These SHOULD still be detected as potential table rows
1001        assert!(TableUtils::is_potential_table_row("| cell | cell |"));
1002        assert!(TableUtils::is_potential_table_row("cell | cell"));
1003        assert!(TableUtils::is_potential_table_row("| Header | Header |"));
1004    }
1005
1006    #[test]
1007    fn test_is_delimiter_row() {
1008        // Basic delimiter rows
1009        assert!(TableUtils::is_delimiter_row("|---|---|"));
1010        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
1011        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
1012        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
1013
1014        // With varying dash counts
1015        assert!(TableUtils::is_delimiter_row("|-|--|"));
1016        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
1017
1018        // With whitespace
1019        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
1020        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
1021
1022        // Multiple columns
1023        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
1024
1025        // Without leading/trailing pipes
1026        assert!(TableUtils::is_delimiter_row("--- | ---"));
1027        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
1028
1029        // Not delimiter rows
1030        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
1031        assert!(!TableUtils::is_delimiter_row("Regular text"));
1032        assert!(!TableUtils::is_delimiter_row(""));
1033        assert!(!TableUtils::is_delimiter_row("|||"));
1034        assert!(!TableUtils::is_delimiter_row("| | |"));
1035
1036        // Must have dashes
1037        assert!(!TableUtils::is_delimiter_row("| : | : |"));
1038        assert!(!TableUtils::is_delimiter_row("|    |    |"));
1039
1040        // Mixed content
1041        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
1042        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
1043    }
1044
1045    #[test]
1046    fn test_count_cells() {
1047        // Basic counts
1048        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
1049        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
1050        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
1051        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
1052
1053        // Single cell
1054        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
1055        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
1056
1057        // Empty cells
1058        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
1059        assert_eq!(TableUtils::count_cells("| | | |"), 3);
1060
1061        // Many cells
1062        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
1063
1064        // Edge cases
1065        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
1066        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
1067
1068        // No table
1069        assert_eq!(TableUtils::count_cells("Regular text"), 0);
1070        assert_eq!(TableUtils::count_cells(""), 0);
1071        assert_eq!(TableUtils::count_cells("   "), 0);
1072
1073        // Whitespace handling
1074        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
1075        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
1076    }
1077
1078    #[test]
1079    fn test_count_cells_with_escaped_pipes() {
1080        // In GFM tables, escape handling happens BEFORE cell splitting.
1081        // Inline code does NOT protect pipes - they still act as cell delimiters.
1082        // To include a literal pipe in a table cell, you MUST escape it with \|
1083
1084        // Basic table structure
1085        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
1086        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
1087        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
1088
1089        // Escaped pipes: \| keeps the pipe as content
1090        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
1091        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
1092
1093        // Escaped pipes inside backticks (correct way to include | in code in tables)
1094        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
1095
1096        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
1097        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
1098        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
1099
1100        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
1101        // This matches GitHub's actual rendering where `a | b` splits into two cells
1102        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
1103        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
1104        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
1105
1106        // The regex example from Issue #34 - pipes in regex patterns need escaping
1107        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
1108        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
1109        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
1110        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
1111    }
1112
1113    #[test]
1114    fn test_determine_pipe_style() {
1115        // All pipe styles
1116        assert_eq!(
1117            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
1118            Some("leading_and_trailing")
1119        );
1120        assert_eq!(
1121            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
1122            Some("leading_only")
1123        );
1124        assert_eq!(
1125            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
1126            Some("trailing_only")
1127        );
1128        assert_eq!(
1129            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
1130            Some("no_leading_or_trailing")
1131        );
1132
1133        // With whitespace
1134        assert_eq!(
1135            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
1136            Some("leading_and_trailing")
1137        );
1138        assert_eq!(
1139            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
1140            Some("leading_only")
1141        );
1142
1143        // No pipes
1144        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
1145        assert_eq!(TableUtils::determine_pipe_style(""), None);
1146        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1147
1148        // Single pipe cases
1149        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
1150        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
1151        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
1152    }
1153
1154    #[test]
1155    fn test_find_table_blocks_simple() {
1156        let content = "| Header 1 | Header 2 |
1157|-----------|-----------|
1158| Cell 1    | Cell 2    |
1159| Cell 3    | Cell 4    |";
1160
1161        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1162
1163        let tables = TableUtils::find_table_blocks(content, &ctx);
1164        assert_eq!(tables.len(), 1);
1165
1166        let table = &tables[0];
1167        assert_eq!(table.start_line, 0);
1168        assert_eq!(table.end_line, 3);
1169        assert_eq!(table.header_line, 0);
1170        assert_eq!(table.delimiter_line, 1);
1171        assert_eq!(table.content_lines, vec![2, 3]);
1172    }
1173
1174    #[test]
1175    fn test_find_table_blocks_multiple() {
1176        let content = "Some text
1177
1178| Table 1 | Col A |
1179|----------|-------|
1180| Data 1   | Val 1 |
1181
1182More text
1183
1184| Table 2 | Col 2 |
1185|----------|-------|
1186| Data 2   | Data  |";
1187
1188        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1189
1190        let tables = TableUtils::find_table_blocks(content, &ctx);
1191        assert_eq!(tables.len(), 2);
1192
1193        // First table
1194        assert_eq!(tables[0].start_line, 2);
1195        assert_eq!(tables[0].end_line, 4);
1196        assert_eq!(tables[0].header_line, 2);
1197        assert_eq!(tables[0].delimiter_line, 3);
1198        assert_eq!(tables[0].content_lines, vec![4]);
1199
1200        // Second table
1201        assert_eq!(tables[1].start_line, 8);
1202        assert_eq!(tables[1].end_line, 10);
1203        assert_eq!(tables[1].header_line, 8);
1204        assert_eq!(tables[1].delimiter_line, 9);
1205        assert_eq!(tables[1].content_lines, vec![10]);
1206    }
1207
1208    #[test]
1209    fn test_find_table_blocks_no_content_rows() {
1210        let content = "| Header 1 | Header 2 |
1211|-----------|-----------|
1212
1213Next paragraph";
1214
1215        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1216
1217        let tables = TableUtils::find_table_blocks(content, &ctx);
1218        assert_eq!(tables.len(), 1);
1219
1220        let table = &tables[0];
1221        assert_eq!(table.start_line, 0);
1222        assert_eq!(table.end_line, 1); // Just header and delimiter
1223        assert_eq!(table.content_lines.len(), 0);
1224    }
1225
1226    #[test]
1227    fn test_find_table_blocks_in_code_block() {
1228        let content = "```
1229| Not | A | Table |
1230|-----|---|-------|
1231| In  | Code | Block |
1232```
1233
1234| Real | Table |
1235|------|-------|
1236| Data | Here  |";
1237
1238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1239
1240        let tables = TableUtils::find_table_blocks(content, &ctx);
1241        assert_eq!(tables.len(), 1); // Only the table outside code block
1242
1243        let table = &tables[0];
1244        assert_eq!(table.header_line, 6);
1245        assert_eq!(table.delimiter_line, 7);
1246    }
1247
1248    #[test]
1249    fn test_find_table_blocks_no_tables() {
1250        let content = "Just regular text
1251No tables here
1252- List item with | pipe
1253* Another list item";
1254
1255        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1256
1257        let tables = TableUtils::find_table_blocks(content, &ctx);
1258        assert_eq!(tables.len(), 0);
1259    }
1260
1261    #[test]
1262    fn test_find_table_blocks_malformed() {
1263        let content = "| Header without delimiter |
1264| This looks like table |
1265But no delimiter row
1266
1267| Proper | Table |
1268|---------|-------|
1269| Data    | Here  |";
1270
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272
1273        let tables = TableUtils::find_table_blocks(content, &ctx);
1274        assert_eq!(tables.len(), 1); // Only the proper table
1275        assert_eq!(tables[0].header_line, 4);
1276    }
1277
1278    #[test]
1279    fn test_edge_cases() {
1280        // Test empty content
1281        assert!(!TableUtils::is_potential_table_row(""));
1282        assert!(!TableUtils::is_delimiter_row(""));
1283        assert_eq!(TableUtils::count_cells(""), 0);
1284        assert_eq!(TableUtils::determine_pipe_style(""), None);
1285
1286        // Test whitespace only
1287        assert!(!TableUtils::is_potential_table_row("   "));
1288        assert!(!TableUtils::is_delimiter_row("   "));
1289        assert_eq!(TableUtils::count_cells("   "), 0);
1290        assert_eq!(TableUtils::determine_pipe_style("   "), None);
1291
1292        // Test single character
1293        assert!(!TableUtils::is_potential_table_row("|"));
1294        assert!(!TableUtils::is_delimiter_row("|"));
1295        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
1296
1297        // Test very long lines are valid table rows (no length limit)
1298        // Test both single-column and multi-column long lines
1299        let long_single = format!("| {} |", "a".repeat(200));
1300        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
1301
1302        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
1303        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
1304
1305        // Test unicode
1306        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
1307        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
1308        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
1309    }
1310
1311    #[test]
1312    fn test_table_block_struct() {
1313        let block = TableBlock {
1314            start_line: 0,
1315            end_line: 5,
1316            header_line: 0,
1317            delimiter_line: 1,
1318            content_lines: vec![2, 3, 4, 5],
1319            list_context: None,
1320        };
1321
1322        // Test Debug trait
1323        let debug_str = format!("{block:?}");
1324        assert!(debug_str.contains("TableBlock"));
1325        assert!(debug_str.contains("start_line: 0"));
1326
1327        // Test Clone trait
1328        let cloned = block.clone();
1329        assert_eq!(cloned.start_line, block.start_line);
1330        assert_eq!(cloned.end_line, block.end_line);
1331        assert_eq!(cloned.header_line, block.header_line);
1332        assert_eq!(cloned.delimiter_line, block.delimiter_line);
1333        assert_eq!(cloned.content_lines, block.content_lines);
1334        assert!(cloned.list_context.is_none());
1335    }
1336
1337    #[test]
1338    fn test_split_table_row() {
1339        // Basic split
1340        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
1341        assert_eq!(cells.len(), 3);
1342        assert_eq!(cells[0].trim(), "Cell 1");
1343        assert_eq!(cells[1].trim(), "Cell 2");
1344        assert_eq!(cells[2].trim(), "Cell 3");
1345
1346        // Without trailing pipe
1347        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
1348        assert_eq!(cells.len(), 2);
1349
1350        // Empty cells
1351        let cells = TableUtils::split_table_row("| | | |");
1352        assert_eq!(cells.len(), 3);
1353
1354        // Single cell
1355        let cells = TableUtils::split_table_row("| Cell |");
1356        assert_eq!(cells.len(), 1);
1357        assert_eq!(cells[0].trim(), "Cell");
1358
1359        // No pipes
1360        let cells = TableUtils::split_table_row("No pipes here");
1361        assert_eq!(cells.len(), 0);
1362    }
1363
1364    #[test]
1365    fn test_split_table_row_with_escaped_pipes() {
1366        // Escaped pipes should be preserved in cell content
1367        let cells = TableUtils::split_table_row(r"| A | B \| C |");
1368        assert_eq!(cells.len(), 2);
1369        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
1370
1371        // Double backslash + pipe is NOT escaped
1372        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
1373        assert_eq!(cells.len(), 3);
1374    }
1375
1376    #[test]
1377    fn test_split_table_row_with_flavor_mkdocs() {
1378        // MkDocs flavor: pipes in inline code are NOT cell delimiters
1379        let cells =
1380            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
1381        assert_eq!(cells.len(), 2);
1382        assert!(
1383            cells[1].contains("`x | y`"),
1384            "Inline code with pipe should be single cell in MkDocs flavor"
1385        );
1386
1387        // Multiple pipes in inline code
1388        let cells =
1389            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
1390        assert_eq!(cells.len(), 2);
1391        assert!(cells[1].contains("`a | b | c`"));
1392    }
1393
1394    #[test]
1395    fn test_split_table_row_with_flavor_standard() {
1396        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
1397        let cells =
1398            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
1399        // In GFM, `x | y` splits into separate cells
1400        assert_eq!(cells.len(), 3);
1401    }
1402
1403    // === extract_blockquote_prefix tests ===
1404
1405    #[test]
1406    fn test_extract_blockquote_prefix_no_blockquote() {
1407        // Regular table row without blockquote
1408        let (prefix, content) = TableUtils::extract_blockquote_prefix("| H1 | H2 |");
1409        assert_eq!(prefix, "");
1410        assert_eq!(content, "| H1 | H2 |");
1411    }
1412
1413    #[test]
1414    fn test_extract_blockquote_prefix_single_level() {
1415        // Single blockquote level
1416        let (prefix, content) = TableUtils::extract_blockquote_prefix("> | H1 | H2 |");
1417        assert_eq!(prefix, "> ");
1418        assert_eq!(content, "| H1 | H2 |");
1419    }
1420
1421    #[test]
1422    fn test_extract_blockquote_prefix_double_level() {
1423        // Double blockquote level
1424        let (prefix, content) = TableUtils::extract_blockquote_prefix(">> | H1 | H2 |");
1425        assert_eq!(prefix, ">> ");
1426        assert_eq!(content, "| H1 | H2 |");
1427    }
1428
1429    #[test]
1430    fn test_extract_blockquote_prefix_triple_level() {
1431        // Triple blockquote level
1432        let (prefix, content) = TableUtils::extract_blockquote_prefix(">>> | H1 | H2 |");
1433        assert_eq!(prefix, ">>> ");
1434        assert_eq!(content, "| H1 | H2 |");
1435    }
1436
1437    #[test]
1438    fn test_extract_blockquote_prefix_with_spaces() {
1439        // Blockquote with spaces between markers
1440        let (prefix, content) = TableUtils::extract_blockquote_prefix("> > | H1 | H2 |");
1441        assert_eq!(prefix, "> > ");
1442        assert_eq!(content, "| H1 | H2 |");
1443    }
1444
1445    #[test]
1446    fn test_extract_blockquote_prefix_indented() {
1447        // Indented blockquote
1448        let (prefix, content) = TableUtils::extract_blockquote_prefix("  > | H1 | H2 |");
1449        assert_eq!(prefix, "  > ");
1450        assert_eq!(content, "| H1 | H2 |");
1451    }
1452
1453    #[test]
1454    fn test_extract_blockquote_prefix_no_space_after() {
1455        // Blockquote without space after marker
1456        let (prefix, content) = TableUtils::extract_blockquote_prefix(">| H1 | H2 |");
1457        assert_eq!(prefix, ">");
1458        assert_eq!(content, "| H1 | H2 |");
1459    }
1460
1461    #[test]
1462    fn test_determine_pipe_style_in_blockquote() {
1463        // determine_pipe_style should handle blockquotes correctly
1464        assert_eq!(
1465            TableUtils::determine_pipe_style("> | H1 | H2 |"),
1466            Some("leading_and_trailing")
1467        );
1468        assert_eq!(
1469            TableUtils::determine_pipe_style("> H1 | H2"),
1470            Some("no_leading_or_trailing")
1471        );
1472        assert_eq!(
1473            TableUtils::determine_pipe_style(">> | H1 | H2 |"),
1474            Some("leading_and_trailing")
1475        );
1476        assert_eq!(TableUtils::determine_pipe_style(">>> | H1 | H2"), Some("leading_only"));
1477    }
1478
1479    #[test]
1480    fn test_list_table_delimiter_requires_indentation() {
1481        // Test case: list item contains pipe, but delimiter line is at column 1
1482        // This should NOT be detected as a list table since the delimiter has no indentation.
1483        // The result is a non-list table starting at line 0 (the list item becomes the header)
1484        // but list_context should be None.
1485        let content = "- List item with | pipe\n|---|---|\n| Cell 1 | Cell 2 |";
1486        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1487        let tables = TableUtils::find_table_blocks(content, &ctx);
1488
1489        // The table will be detected starting at line 0, but crucially it should NOT have
1490        // list_context set, meaning it won't be treated as a list-table for column count purposes
1491        assert_eq!(tables.len(), 1, "Should find exactly one table");
1492        assert!(
1493            tables[0].list_context.is_none(),
1494            "Should NOT have list context since delimiter has no indentation"
1495        );
1496    }
1497
1498    #[test]
1499    fn test_list_table_with_properly_indented_delimiter() {
1500        // Test case: list item with table header, delimiter properly indented
1501        // This SHOULD be detected as a list table
1502        let content = "- | Header 1 | Header 2 |\n  |----------|----------|\n  | Cell 1   | Cell 2   |";
1503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1504        let tables = TableUtils::find_table_blocks(content, &ctx);
1505
1506        // Should find exactly one list-table starting at line 0
1507        assert_eq!(tables.len(), 1, "Should find exactly one table");
1508        assert_eq!(tables[0].start_line, 0, "Table should start at list item line");
1509        assert!(
1510            tables[0].list_context.is_some(),
1511            "Should be a list table since delimiter is properly indented"
1512        );
1513    }
1514}