rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
28            return false;
29        }
30
31        // Skip lines that are clearly code or inline code
32        if trimmed.starts_with("`") || trimmed.contains("``") {
33            return false;
34        }
35
36        // Must have at least 2 parts when split by |
37        let parts: Vec<&str> = trimmed.split('|').collect();
38        if parts.len() < 2 {
39            return false;
40        }
41
42        // Check if it looks like a table row by having reasonable content between pipes
43        let mut valid_parts = 0;
44        let mut total_non_empty_parts = 0;
45
46        for part in &parts {
47            let part_trimmed = part.trim();
48            // Skip empty parts (from leading/trailing pipes)
49            if part_trimmed.is_empty() {
50                continue;
51            }
52            total_non_empty_parts += 1;
53
54            // Count parts that look like table cells (reasonable content, no newlines)
55            if !part_trimmed.contains('\n') {
56                valid_parts += 1;
57            }
58        }
59
60        // Check if all non-empty parts are valid (no newlines)
61        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
62            // Some cells contain newlines, not a valid table row
63            return false;
64        }
65
66        // GFM allows tables with all empty cells (e.g., |||)
67        // These are valid if they have proper table formatting (leading and trailing pipes)
68        if total_non_empty_parts == 0 {
69            // Empty cells are only valid with proper pipe formatting
70            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
71        }
72
73        // GFM allows single-column tables, so >= 1 valid part is enough
74        // when the line has proper table formatting (pipes)
75        if trimmed.starts_with('|') && trimmed.ends_with('|') {
76            // Properly formatted table row with pipes on both ends
77            valid_parts >= 1
78        } else {
79            // For rows without proper pipe formatting, require at least 2 cells
80            valid_parts >= 2
81        }
82    }
83
84    /// Check if a line is a table delimiter row (e.g., |---|---|)
85    pub fn is_delimiter_row(line: &str) -> bool {
86        let trimmed = line.trim();
87        if !trimmed.contains('|') || !trimmed.contains('-') {
88            return false;
89        }
90
91        // Split by pipes and check each part
92        let parts: Vec<&str> = trimmed.split('|').collect();
93        let mut valid_delimiter_parts = 0;
94        let mut total_non_empty_parts = 0;
95
96        for part in &parts {
97            let part_trimmed = part.trim();
98            if part_trimmed.is_empty() {
99                continue; // Skip empty parts from leading/trailing pipes
100            }
101
102            total_non_empty_parts += 1;
103
104            // Check if this part looks like a delimiter (contains dashes and optionally colons)
105            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
106                valid_delimiter_parts += 1;
107            }
108        }
109
110        // All non-empty parts must be valid delimiters, and there must be at least one
111        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
112    }
113
114    /// Find all table blocks in the content with optimized detection
115    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
116    pub fn find_table_blocks_with_code_info(
117        content: &str,
118        code_blocks: &[(usize, usize)],
119        code_spans: &[crate::lint_context::CodeSpan],
120        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
121    ) -> Vec<TableBlock> {
122        let lines: Vec<&str> = content.lines().collect();
123        let mut tables = Vec::new();
124        let mut i = 0;
125
126        // Pre-compute line positions for efficient code block checking
127        let mut line_positions = Vec::with_capacity(lines.len());
128        let mut pos = 0;
129        for line in &lines {
130            line_positions.push(pos);
131            pos += line.len() + 1; // +1 for newline
132        }
133
134        while i < lines.len() {
135            // Skip lines in code blocks, code spans, or HTML comments
136            let line_start = line_positions[i];
137            let in_code =
138                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
139                    || code_spans
140                        .iter()
141                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
142            let in_html_comment = html_comment_ranges
143                .iter()
144                .any(|range| line_start >= range.start && line_start < range.end);
145
146            if in_code || in_html_comment {
147                i += 1;
148                continue;
149            }
150
151            // Look for potential table start
152            if Self::is_potential_table_row(lines[i]) {
153                // Check if the next line is a delimiter row
154                if i + 1 < lines.len() && Self::is_delimiter_row(lines[i + 1]) {
155                    // Found a table! Find its end
156                    let table_start = i;
157                    let header_line = i;
158                    let delimiter_line = i + 1;
159                    let mut table_end = i + 1; // Include the delimiter row
160                    let mut content_lines = Vec::new();
161
162                    // Continue while we have table rows
163                    let mut j = i + 2;
164                    while j < lines.len() {
165                        let line = lines[j];
166                        if line.trim().is_empty() {
167                            // Empty line ends the table
168                            break;
169                        }
170                        if Self::is_potential_table_row(line) {
171                            content_lines.push(j);
172                            table_end = j;
173                            j += 1;
174                        } else {
175                            // Non-table line ends the table
176                            break;
177                        }
178                    }
179
180                    tables.push(TableBlock {
181                        start_line: table_start,
182                        end_line: table_end,
183                        header_line,
184                        delimiter_line,
185                        content_lines,
186                    });
187                    i = table_end + 1;
188                } else {
189                    i += 1;
190                }
191            } else {
192                i += 1;
193            }
194        }
195
196        tables
197    }
198
199    /// Find all table blocks in the content with optimized detection
200    /// This is a backward-compatible wrapper that accepts LintContext
201    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
202        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
203    }
204
205    /// Count the number of cells in a table row
206    /// Uses Standard/GFM behavior where pipes in inline code ARE cell delimiters
207    pub fn count_cells(row: &str) -> usize {
208        Self::count_cells_with_flavor(row, crate::config::MarkdownFlavor::Standard)
209    }
210
211    /// Count the number of cells in a table row with flavor-specific behavior
212    ///
213    /// Different Markdown flavors handle pipes inside inline code differently:
214    /// - Standard/GFM: Pipes in backticks ARE cell delimiters (GitHub behavior)
215    /// - MkDocs: Pipes in backticks are NOT cell delimiters (Python-Markdown behavior)
216    ///
217    /// This difference is due to Python-Markdown (used by MkDocs) fixing the parsing
218    /// to handle inline code spans before splitting by pipes, while GitHub GFM
219    /// splits by pipes first.
220    pub fn count_cells_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> usize {
221        Self::split_table_row_with_flavor(row, flavor).len()
222    }
223
224    /// Mask pipes inside inline code blocks with a placeholder character
225    pub fn mask_pipes_in_inline_code(text: &str) -> String {
226        let mut result = String::new();
227        let chars: Vec<char> = text.chars().collect();
228        let mut i = 0;
229
230        while i < chars.len() {
231            if chars[i] == '`' {
232                // Count consecutive backticks at start
233                let start = i;
234                let mut backtick_count = 0;
235                while i < chars.len() && chars[i] == '`' {
236                    backtick_count += 1;
237                    i += 1;
238                }
239
240                // Look for matching closing backticks
241                let mut found_closing = false;
242                let mut j = i;
243
244                while j < chars.len() {
245                    if chars[j] == '`' {
246                        // Count potential closing backticks
247                        let close_start = j;
248                        let mut close_count = 0;
249                        while j < chars.len() && chars[j] == '`' {
250                            close_count += 1;
251                            j += 1;
252                        }
253
254                        if close_count == backtick_count {
255                            // Found matching closing backticks
256                            found_closing = true;
257
258                            // Valid inline code - add with pipes masked
259                            result.extend(chars[start..i].iter());
260
261                            for &ch in chars.iter().take(close_start).skip(i) {
262                                if ch == '|' {
263                                    result.push('_'); // Mask pipe with underscore
264                                } else {
265                                    result.push(ch);
266                                }
267                            }
268
269                            result.extend(chars[close_start..j].iter());
270                            i = j;
271                            break;
272                        }
273                        // If not matching, continue searching (j is already past these backticks)
274                    } else {
275                        j += 1;
276                    }
277                }
278
279                if !found_closing {
280                    // No matching closing found, treat as regular text
281                    result.extend(chars[start..i].iter());
282                }
283            } else {
284                result.push(chars[i]);
285                i += 1;
286            }
287        }
288
289        result
290    }
291
292    /// Mask escaped pipes for accurate table cell parsing
293    ///
294    /// In GFM tables, escape handling happens BEFORE cell boundary detection:
295    /// - `\|` → escaped pipe → masked (stays as cell content)
296    /// - `\\|` → escaped backslash + pipe → NOT masked (pipe is a delimiter)
297    ///
298    /// IMPORTANT: Inline code spans do NOT protect pipes in GFM tables!
299    /// The pipe in `` `a | b` `` still acts as a cell delimiter, splitting into
300    /// two cells: `` `a `` and ` b` ``. This matches GitHub's actual rendering.
301    ///
302    /// To include a literal pipe in a table cell (even in code), you must escape it:
303    /// `` `a \| b` `` → single cell containing `a | b` (with code formatting)
304    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
305        let mut result = String::new();
306        let chars: Vec<char> = text.chars().collect();
307        let mut i = 0;
308
309        while i < chars.len() {
310            if chars[i] == '\\' {
311                if i + 1 < chars.len() && chars[i + 1] == '\\' {
312                    // Escaped backslash: \\ → push both and continue
313                    // The next character (if it's a pipe) will be a real delimiter
314                    result.push('\\');
315                    result.push('\\');
316                    i += 2;
317                } else if i + 1 < chars.len() && chars[i + 1] == '|' {
318                    // Escaped pipe: \| → mask the pipe
319                    result.push('\\');
320                    result.push('_'); // Mask the pipe
321                    i += 2;
322                } else {
323                    // Single backslash not followed by \ or | → just push it
324                    result.push(chars[i]);
325                    i += 1;
326                }
327            } else {
328                result.push(chars[i]);
329                i += 1;
330            }
331        }
332
333        result
334    }
335
336    /// Split a table row into individual cell contents with flavor-specific behavior.
337    ///
338    /// Returns a Vec of cell content strings (not trimmed - preserves original spacing).
339    /// This is the foundation for both cell counting and cell content extraction.
340    ///
341    /// Different Markdown flavors handle pipes inside inline code differently:
342    /// - Standard/GFM: Pipes in backticks ARE cell delimiters (GitHub behavior)
343    /// - MkDocs: Pipes in backticks are NOT cell delimiters (Python-Markdown behavior)
344    pub fn split_table_row_with_flavor(row: &str, flavor: crate::config::MarkdownFlavor) -> Vec<String> {
345        let trimmed = row.trim();
346
347        if !trimmed.contains('|') {
348            return Vec::new();
349        }
350
351        // First, mask escaped pipes (same for all flavors)
352        let masked = Self::mask_pipes_for_table_parsing(trimmed);
353
354        // For MkDocs flavor, also mask pipes inside inline code
355        let final_masked = if flavor == crate::config::MarkdownFlavor::MkDocs {
356            Self::mask_pipes_in_inline_code(&masked)
357        } else {
358            masked
359        };
360
361        let has_leading = final_masked.starts_with('|');
362        let has_trailing = final_masked.ends_with('|');
363
364        let mut masked_content = final_masked.as_str();
365        let mut orig_content = trimmed;
366
367        if has_leading {
368            masked_content = &masked_content[1..];
369            orig_content = &orig_content[1..];
370        }
371
372        // Track whether we actually strip a trailing pipe
373        let stripped_trailing = has_trailing && !masked_content.is_empty();
374        if stripped_trailing {
375            masked_content = &masked_content[..masked_content.len() - 1];
376            orig_content = &orig_content[..orig_content.len() - 1];
377        }
378
379        // Handle edge cases for degenerate inputs
380        if masked_content.is_empty() {
381            if stripped_trailing {
382                // "||" case: two pipes with empty content between = one empty cell
383                return vec![String::new()];
384            } else {
385                // "|" case: single pipe, not a valid table row
386                return Vec::new();
387            }
388        }
389
390        let masked_parts: Vec<&str> = masked_content.split('|').collect();
391        let mut cells = Vec::new();
392        let mut pos = 0;
393
394        for masked_cell in masked_parts {
395            let cell_len = masked_cell.len();
396            let orig_cell = if pos + cell_len <= orig_content.len() {
397                &orig_content[pos..pos + cell_len]
398            } else {
399                masked_cell
400            };
401            cells.push(orig_cell.to_string());
402            pos += cell_len + 1; // +1 for the pipe delimiter
403        }
404
405        cells
406    }
407
408    /// Split a table row into individual cell contents using Standard/GFM behavior.
409    pub fn split_table_row(row: &str) -> Vec<String> {
410        Self::split_table_row_with_flavor(row, crate::config::MarkdownFlavor::Standard)
411    }
412
413    /// Determine the pipe style of a table row
414    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
415        let trimmed = line.trim();
416        if !trimmed.contains('|') {
417            return None;
418        }
419
420        let has_leading = trimmed.starts_with('|');
421        let has_trailing = trimmed.ends_with('|');
422
423        match (has_leading, has_trailing) {
424            (true, true) => Some("leading_and_trailing"),
425            (true, false) => Some("leading_only"),
426            (false, true) => Some("trailing_only"),
427            (false, false) => Some("no_leading_or_trailing"),
428        }
429    }
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435    use crate::lint_context::LintContext;
436
437    #[test]
438    fn test_is_potential_table_row() {
439        // Basic valid table rows
440        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
441        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
442        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
443        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
444
445        // Multiple cells
446        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
447
448        // With whitespace
449        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
450        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
451
452        // Not table rows
453        assert!(!TableUtils::is_potential_table_row("- List item"));
454        assert!(!TableUtils::is_potential_table_row("* Another list"));
455        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
456        assert!(!TableUtils::is_potential_table_row("Regular text"));
457        assert!(!TableUtils::is_potential_table_row(""));
458        assert!(!TableUtils::is_potential_table_row("   "));
459
460        // Code blocks
461        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
462        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
463
464        // Single pipe not enough
465        assert!(!TableUtils::is_potential_table_row("Just one |"));
466        assert!(!TableUtils::is_potential_table_row("| Just one"));
467
468        // Very long cells are valid in tables (no length limit for cell content)
469        let long_cell = "a".repeat(150);
470        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
471
472        // Cells with newlines
473        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
474
475        // Empty cells (Issue #129)
476        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
477        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
478        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
479    }
480
481    #[test]
482    fn test_is_delimiter_row() {
483        // Basic delimiter rows
484        assert!(TableUtils::is_delimiter_row("|---|---|"));
485        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
486        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
487        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
488
489        // With varying dash counts
490        assert!(TableUtils::is_delimiter_row("|-|--|"));
491        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
492
493        // With whitespace
494        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
495        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
496
497        // Multiple columns
498        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
499
500        // Without leading/trailing pipes
501        assert!(TableUtils::is_delimiter_row("--- | ---"));
502        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
503
504        // Not delimiter rows
505        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
506        assert!(!TableUtils::is_delimiter_row("Regular text"));
507        assert!(!TableUtils::is_delimiter_row(""));
508        assert!(!TableUtils::is_delimiter_row("|||"));
509        assert!(!TableUtils::is_delimiter_row("| | |"));
510
511        // Must have dashes
512        assert!(!TableUtils::is_delimiter_row("| : | : |"));
513        assert!(!TableUtils::is_delimiter_row("|    |    |"));
514
515        // Mixed content
516        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
517        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
518    }
519
520    #[test]
521    fn test_count_cells() {
522        // Basic counts
523        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
524        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
525        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
526        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
527
528        // Single cell
529        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
530        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
531
532        // Empty cells
533        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
534        assert_eq!(TableUtils::count_cells("| | | |"), 3);
535
536        // Many cells
537        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
538
539        // Edge cases
540        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
541        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
542
543        // No table
544        assert_eq!(TableUtils::count_cells("Regular text"), 0);
545        assert_eq!(TableUtils::count_cells(""), 0);
546        assert_eq!(TableUtils::count_cells("   "), 0);
547
548        // Whitespace handling
549        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
550        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
551    }
552
553    #[test]
554    fn test_count_cells_with_escaped_pipes() {
555        // In GFM tables, escape handling happens BEFORE cell splitting.
556        // Inline code does NOT protect pipes - they still act as cell delimiters.
557        // To include a literal pipe in a table cell, you MUST escape it with \|
558
559        // Basic table structure
560        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
561        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
562        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
563
564        // Escaped pipes: \| keeps the pipe as content
565        assert_eq!(TableUtils::count_cells(r"| Command | echo \| grep |"), 2);
566        assert_eq!(TableUtils::count_cells(r"| A | B \| C |"), 2); // B | C is one cell
567
568        // Escaped pipes inside backticks (correct way to include | in code in tables)
569        assert_eq!(TableUtils::count_cells(r"| Command | `echo \| grep` |"), 2);
570
571        // Double backslash + pipe: \\| means escaped backslash followed by pipe delimiter
572        assert_eq!(TableUtils::count_cells(r"| A | B \\| C |"), 3); // \\| is NOT escaped pipe
573        assert_eq!(TableUtils::count_cells(r"| A | `B \\| C` |"), 3); // Same inside code
574
575        // IMPORTANT: Bare pipes in inline code DO act as delimiters (GFM behavior)
576        // This matches GitHub's actual rendering where `a | b` splits into two cells
577        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 3);
578        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 4);
579        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 2);
580
581        // The regex example from Issue #34 - pipes in regex patterns need escaping
582        // Unescaped: `^([0-1]?\d|2[0-3])` has a bare | which splits cells
583        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d|2[0-3])` |"), 3);
584        // Escaped: `^([0-1]?\d\|2[0-3])` keeps the | as part of the regex
585        assert_eq!(TableUtils::count_cells(r"| Hour formats | `^([0-1]?\d\|2[0-3])` |"), 2);
586    }
587
588    #[test]
589    fn test_determine_pipe_style() {
590        // All pipe styles
591        assert_eq!(
592            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
593            Some("leading_and_trailing")
594        );
595        assert_eq!(
596            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
597            Some("leading_only")
598        );
599        assert_eq!(
600            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
601            Some("trailing_only")
602        );
603        assert_eq!(
604            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
605            Some("no_leading_or_trailing")
606        );
607
608        // With whitespace
609        assert_eq!(
610            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
611            Some("leading_and_trailing")
612        );
613        assert_eq!(
614            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
615            Some("leading_only")
616        );
617
618        // No pipes
619        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
620        assert_eq!(TableUtils::determine_pipe_style(""), None);
621        assert_eq!(TableUtils::determine_pipe_style("   "), None);
622
623        // Single pipe cases
624        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
625        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
626        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
627    }
628
629    #[test]
630    fn test_find_table_blocks_simple() {
631        let content = "| Header 1 | Header 2 |
632|-----------|-----------|
633| Cell 1    | Cell 2    |
634| Cell 3    | Cell 4    |";
635
636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
637
638        let tables = TableUtils::find_table_blocks(content, &ctx);
639        assert_eq!(tables.len(), 1);
640
641        let table = &tables[0];
642        assert_eq!(table.start_line, 0);
643        assert_eq!(table.end_line, 3);
644        assert_eq!(table.header_line, 0);
645        assert_eq!(table.delimiter_line, 1);
646        assert_eq!(table.content_lines, vec![2, 3]);
647    }
648
649    #[test]
650    fn test_find_table_blocks_multiple() {
651        let content = "Some text
652
653| Table 1 | Col A |
654|----------|-------|
655| Data 1   | Val 1 |
656
657More text
658
659| Table 2 | Col 2 |
660|----------|-------|
661| Data 2   | Data  |";
662
663        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
664
665        let tables = TableUtils::find_table_blocks(content, &ctx);
666        assert_eq!(tables.len(), 2);
667
668        // First table
669        assert_eq!(tables[0].start_line, 2);
670        assert_eq!(tables[0].end_line, 4);
671        assert_eq!(tables[0].header_line, 2);
672        assert_eq!(tables[0].delimiter_line, 3);
673        assert_eq!(tables[0].content_lines, vec![4]);
674
675        // Second table
676        assert_eq!(tables[1].start_line, 8);
677        assert_eq!(tables[1].end_line, 10);
678        assert_eq!(tables[1].header_line, 8);
679        assert_eq!(tables[1].delimiter_line, 9);
680        assert_eq!(tables[1].content_lines, vec![10]);
681    }
682
683    #[test]
684    fn test_find_table_blocks_no_content_rows() {
685        let content = "| Header 1 | Header 2 |
686|-----------|-----------|
687
688Next paragraph";
689
690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
691
692        let tables = TableUtils::find_table_blocks(content, &ctx);
693        assert_eq!(tables.len(), 1);
694
695        let table = &tables[0];
696        assert_eq!(table.start_line, 0);
697        assert_eq!(table.end_line, 1); // Just header and delimiter
698        assert_eq!(table.content_lines.len(), 0);
699    }
700
701    #[test]
702    fn test_find_table_blocks_in_code_block() {
703        let content = "```
704| Not | A | Table |
705|-----|---|-------|
706| In  | Code | Block |
707```
708
709| Real | Table |
710|------|-------|
711| Data | Here  |";
712
713        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
714
715        let tables = TableUtils::find_table_blocks(content, &ctx);
716        assert_eq!(tables.len(), 1); // Only the table outside code block
717
718        let table = &tables[0];
719        assert_eq!(table.header_line, 6);
720        assert_eq!(table.delimiter_line, 7);
721    }
722
723    #[test]
724    fn test_find_table_blocks_no_tables() {
725        let content = "Just regular text
726No tables here
727- List item with | pipe
728* Another list item";
729
730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
731
732        let tables = TableUtils::find_table_blocks(content, &ctx);
733        assert_eq!(tables.len(), 0);
734    }
735
736    #[test]
737    fn test_find_table_blocks_malformed() {
738        let content = "| Header without delimiter |
739| This looks like table |
740But no delimiter row
741
742| Proper | Table |
743|---------|-------|
744| Data    | Here  |";
745
746        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
747
748        let tables = TableUtils::find_table_blocks(content, &ctx);
749        assert_eq!(tables.len(), 1); // Only the proper table
750        assert_eq!(tables[0].header_line, 4);
751    }
752
753    #[test]
754    fn test_edge_cases() {
755        // Test empty content
756        assert!(!TableUtils::is_potential_table_row(""));
757        assert!(!TableUtils::is_delimiter_row(""));
758        assert_eq!(TableUtils::count_cells(""), 0);
759        assert_eq!(TableUtils::determine_pipe_style(""), None);
760
761        // Test whitespace only
762        assert!(!TableUtils::is_potential_table_row("   "));
763        assert!(!TableUtils::is_delimiter_row("   "));
764        assert_eq!(TableUtils::count_cells("   "), 0);
765        assert_eq!(TableUtils::determine_pipe_style("   "), None);
766
767        // Test single character
768        assert!(!TableUtils::is_potential_table_row("|"));
769        assert!(!TableUtils::is_delimiter_row("|"));
770        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
771
772        // Test very long lines are valid table rows (no length limit)
773        // Test both single-column and multi-column long lines
774        let long_single = format!("| {} |", "a".repeat(200));
775        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
776
777        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
778        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
779
780        // Test unicode
781        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
782        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
783        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
784    }
785
786    #[test]
787    fn test_table_block_struct() {
788        let block = TableBlock {
789            start_line: 0,
790            end_line: 5,
791            header_line: 0,
792            delimiter_line: 1,
793            content_lines: vec![2, 3, 4, 5],
794        };
795
796        // Test Debug trait
797        let debug_str = format!("{block:?}");
798        assert!(debug_str.contains("TableBlock"));
799        assert!(debug_str.contains("start_line: 0"));
800
801        // Test Clone trait
802        let cloned = block.clone();
803        assert_eq!(cloned.start_line, block.start_line);
804        assert_eq!(cloned.end_line, block.end_line);
805        assert_eq!(cloned.header_line, block.header_line);
806        assert_eq!(cloned.delimiter_line, block.delimiter_line);
807        assert_eq!(cloned.content_lines, block.content_lines);
808    }
809
810    #[test]
811    fn test_split_table_row() {
812        // Basic split
813        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2 | Cell 3 |");
814        assert_eq!(cells.len(), 3);
815        assert_eq!(cells[0].trim(), "Cell 1");
816        assert_eq!(cells[1].trim(), "Cell 2");
817        assert_eq!(cells[2].trim(), "Cell 3");
818
819        // Without trailing pipe
820        let cells = TableUtils::split_table_row("| Cell 1 | Cell 2");
821        assert_eq!(cells.len(), 2);
822
823        // Empty cells
824        let cells = TableUtils::split_table_row("| | | |");
825        assert_eq!(cells.len(), 3);
826
827        // Single cell
828        let cells = TableUtils::split_table_row("| Cell |");
829        assert_eq!(cells.len(), 1);
830        assert_eq!(cells[0].trim(), "Cell");
831
832        // No pipes
833        let cells = TableUtils::split_table_row("No pipes here");
834        assert_eq!(cells.len(), 0);
835    }
836
837    #[test]
838    fn test_split_table_row_with_escaped_pipes() {
839        // Escaped pipes should be preserved in cell content
840        let cells = TableUtils::split_table_row(r"| A | B \| C |");
841        assert_eq!(cells.len(), 2);
842        assert!(cells[1].contains(r"\|"), "Escaped pipe should be in cell content");
843
844        // Double backslash + pipe is NOT escaped
845        let cells = TableUtils::split_table_row(r"| A | B \\| C |");
846        assert_eq!(cells.len(), 3);
847    }
848
849    #[test]
850    fn test_split_table_row_with_flavor_mkdocs() {
851        // MkDocs flavor: pipes in inline code are NOT cell delimiters
852        let cells =
853            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::MkDocs);
854        assert_eq!(cells.len(), 2);
855        assert!(
856            cells[1].contains("`x | y`"),
857            "Inline code with pipe should be single cell in MkDocs flavor"
858        );
859
860        // Multiple pipes in inline code
861        let cells =
862            TableUtils::split_table_row_with_flavor("| Type | `a | b | c` |", crate::config::MarkdownFlavor::MkDocs);
863        assert_eq!(cells.len(), 2);
864        assert!(cells[1].contains("`a | b | c`"));
865    }
866
867    #[test]
868    fn test_split_table_row_with_flavor_standard() {
869        // Standard/GFM flavor: pipes in inline code ARE cell delimiters
870        let cells =
871            TableUtils::split_table_row_with_flavor("| Type | `x | y` |", crate::config::MarkdownFlavor::Standard);
872        // In GFM, `x | y` splits into separate cells
873        assert_eq!(cells.len(), 3);
874    }
875}