rumdl_lib/utils/
table_utils.rs

1/// Shared table detection and processing utilities for markdown linting rules
2///
3/// This module provides optimized table detection and processing functionality
4/// that can be shared across multiple table-related rules (MD055, MD056, MD058).
5/// Represents a table block in the document
6#[derive(Debug, Clone)]
7pub struct TableBlock {
8    pub start_line: usize,
9    pub end_line: usize,
10    pub header_line: usize,
11    pub delimiter_line: usize,
12    pub content_lines: Vec<usize>,
13}
14
15/// Shared table detection utilities
16pub struct TableUtils;
17
18impl TableUtils {
19    /// Check if a line looks like a potential table row
20    pub fn is_potential_table_row(line: &str) -> bool {
21        let trimmed = line.trim();
22        if trimmed.is_empty() || !trimmed.contains('|') {
23            return false;
24        }
25
26        // Skip lines that are clearly not table rows
27        if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
28            return false;
29        }
30
31        // Skip lines that are clearly code or inline code
32        if trimmed.starts_with("`") || trimmed.contains("``") {
33            return false;
34        }
35
36        // Must have at least 2 parts when split by |
37        let parts: Vec<&str> = trimmed.split('|').collect();
38        if parts.len() < 2 {
39            return false;
40        }
41
42        // Check if it looks like a table row by having reasonable content between pipes
43        let mut valid_parts = 0;
44        let mut total_non_empty_parts = 0;
45
46        for part in &parts {
47            let part_trimmed = part.trim();
48            // Skip empty parts (from leading/trailing pipes)
49            if part_trimmed.is_empty() {
50                continue;
51            }
52            total_non_empty_parts += 1;
53
54            // Count parts that look like table cells (reasonable content, no newlines)
55            if !part_trimmed.contains('\n') {
56                valid_parts += 1;
57            }
58        }
59
60        // Check if all non-empty parts are valid (no newlines)
61        if total_non_empty_parts > 0 && valid_parts != total_non_empty_parts {
62            // Some cells contain newlines, not a valid table row
63            return false;
64        }
65
66        // GFM allows tables with all empty cells (e.g., |||)
67        // These are valid if they have proper table formatting (leading and trailing pipes)
68        if total_non_empty_parts == 0 {
69            // Empty cells are only valid with proper pipe formatting
70            return trimmed.starts_with('|') && trimmed.ends_with('|') && parts.len() >= 3;
71        }
72
73        // GFM allows single-column tables, so >= 1 valid part is enough
74        // when the line has proper table formatting (pipes)
75        if trimmed.starts_with('|') && trimmed.ends_with('|') {
76            // Properly formatted table row with pipes on both ends
77            valid_parts >= 1
78        } else {
79            // For rows without proper pipe formatting, require at least 2 cells
80            valid_parts >= 2
81        }
82    }
83
84    /// Check if a line is a table delimiter row (e.g., |---|---|)
85    pub fn is_delimiter_row(line: &str) -> bool {
86        let trimmed = line.trim();
87        if !trimmed.contains('|') || !trimmed.contains('-') {
88            return false;
89        }
90
91        // Split by pipes and check each part
92        let parts: Vec<&str> = trimmed.split('|').collect();
93        let mut valid_delimiter_parts = 0;
94        let mut total_non_empty_parts = 0;
95
96        for part in &parts {
97            let part_trimmed = part.trim();
98            if part_trimmed.is_empty() {
99                continue; // Skip empty parts from leading/trailing pipes
100            }
101
102            total_non_empty_parts += 1;
103
104            // Check if this part looks like a delimiter (contains dashes and optionally colons)
105            if part_trimmed.chars().all(|c| c == '-' || c == ':' || c.is_whitespace()) && part_trimmed.contains('-') {
106                valid_delimiter_parts += 1;
107            }
108        }
109
110        // All non-empty parts must be valid delimiters, and there must be at least one
111        total_non_empty_parts > 0 && valid_delimiter_parts == total_non_empty_parts
112    }
113
114    /// Find all table blocks in the content with optimized detection
115    /// This version accepts code_blocks and code_spans directly for use during LintContext construction
116    pub fn find_table_blocks_with_code_info(
117        content: &str,
118        code_blocks: &[(usize, usize)],
119        code_spans: &[crate::lint_context::CodeSpan],
120        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
121    ) -> Vec<TableBlock> {
122        let lines: Vec<&str> = content.lines().collect();
123        let mut tables = Vec::new();
124        let mut i = 0;
125
126        // Pre-compute line positions for efficient code block checking
127        let mut line_positions = Vec::with_capacity(lines.len());
128        let mut pos = 0;
129        for line in &lines {
130            line_positions.push(pos);
131            pos += line.len() + 1; // +1 for newline
132        }
133
134        while i < lines.len() {
135            // Skip lines in code blocks, code spans, or HTML comments
136            let line_start = line_positions[i];
137            let in_code =
138                crate::utils::code_block_utils::CodeBlockUtils::is_in_code_block_or_span(code_blocks, line_start)
139                    || code_spans
140                        .iter()
141                        .any(|span| line_start >= span.byte_offset && line_start < span.byte_end);
142            let in_html_comment = html_comment_ranges
143                .iter()
144                .any(|range| line_start >= range.start && line_start < range.end);
145
146            if in_code || in_html_comment {
147                i += 1;
148                continue;
149            }
150
151            // Look for potential table start
152            if Self::is_potential_table_row(lines[i]) {
153                // Check if the next line is a delimiter row
154                if i + 1 < lines.len() && Self::is_delimiter_row(lines[i + 1]) {
155                    // Found a table! Find its end
156                    let table_start = i;
157                    let header_line = i;
158                    let delimiter_line = i + 1;
159                    let mut table_end = i + 1; // Include the delimiter row
160                    let mut content_lines = Vec::new();
161
162                    // Continue while we have table rows
163                    let mut j = i + 2;
164                    while j < lines.len() {
165                        let line = lines[j];
166                        if line.trim().is_empty() {
167                            // Empty line ends the table
168                            break;
169                        }
170                        if Self::is_potential_table_row(line) {
171                            content_lines.push(j);
172                            table_end = j;
173                            j += 1;
174                        } else {
175                            // Non-table line ends the table
176                            break;
177                        }
178                    }
179
180                    tables.push(TableBlock {
181                        start_line: table_start,
182                        end_line: table_end,
183                        header_line,
184                        delimiter_line,
185                        content_lines,
186                    });
187                    i = table_end + 1;
188                } else {
189                    i += 1;
190                }
191            } else {
192                i += 1;
193            }
194        }
195
196        tables
197    }
198
199    /// Find all table blocks in the content with optimized detection
200    /// This is a backward-compatible wrapper that accepts LintContext
201    pub fn find_table_blocks(content: &str, ctx: &crate::lint_context::LintContext) -> Vec<TableBlock> {
202        Self::find_table_blocks_with_code_info(content, &ctx.code_blocks, &ctx.code_spans(), ctx.html_comment_ranges())
203    }
204
205    /// Count the number of cells in a table row
206    pub fn count_cells(row: &str) -> usize {
207        let trimmed = row.trim();
208
209        // Skip non-table rows
210        if !trimmed.contains('|') {
211            return 0;
212        }
213
214        // Users shouldn't have to escape pipes in regex patterns, inline code, etc.
215        let masked_row = Self::mask_pipes_for_table_parsing(trimmed);
216
217        // Handle case with leading/trailing pipes
218        let mut cell_count = 0;
219        let parts: Vec<&str> = masked_row.split('|').collect();
220
221        for (i, part) in parts.iter().enumerate() {
222            // Skip first part if it's empty and there's a leading pipe
223            if i == 0 && part.trim().is_empty() && parts.len() > 1 {
224                continue;
225            }
226
227            // Skip last part if it's empty and there's a trailing pipe
228            if i == parts.len() - 1 && part.trim().is_empty() && parts.len() > 1 {
229                continue;
230            }
231
232            cell_count += 1;
233        }
234
235        cell_count
236    }
237
238    /// Mask pipes inside inline code blocks with a placeholder character
239    pub fn mask_pipes_in_inline_code(text: &str) -> String {
240        let mut result = String::new();
241        let chars: Vec<char> = text.chars().collect();
242        let mut i = 0;
243
244        while i < chars.len() {
245            if chars[i] == '`' {
246                // Count consecutive backticks at start
247                let start = i;
248                let mut backtick_count = 0;
249                while i < chars.len() && chars[i] == '`' {
250                    backtick_count += 1;
251                    i += 1;
252                }
253
254                // Look for matching closing backticks
255                let mut found_closing = false;
256                let mut j = i;
257
258                while j < chars.len() {
259                    if chars[j] == '`' {
260                        // Count potential closing backticks
261                        let close_start = j;
262                        let mut close_count = 0;
263                        while j < chars.len() && chars[j] == '`' {
264                            close_count += 1;
265                            j += 1;
266                        }
267
268                        if close_count == backtick_count {
269                            // Found matching closing backticks
270                            found_closing = true;
271
272                            // Valid inline code - add with pipes masked
273                            result.extend(chars[start..i].iter());
274
275                            for &ch in chars.iter().take(close_start).skip(i) {
276                                if ch == '|' {
277                                    result.push('_'); // Mask pipe with underscore
278                                } else {
279                                    result.push(ch);
280                                }
281                            }
282
283                            result.extend(chars[close_start..j].iter());
284                            i = j;
285                            break;
286                        }
287                        // If not matching, continue searching (j is already past these backticks)
288                    } else {
289                        j += 1;
290                    }
291                }
292
293                if !found_closing {
294                    // No matching closing found, treat as regular text
295                    result.extend(chars[start..i].iter());
296                }
297            } else {
298                result.push(chars[i]);
299                i += 1;
300            }
301        }
302
303        result
304    }
305
306    /// Mask both inline code pipes AND escaped pipes for accurate table cell parsing
307    ///
308    /// This function combines two types of masking:
309    /// 1. Pipes inside inline code blocks (between backticks) → masked as '_'
310    /// 2. Escaped pipes `\|` → masked as `\_` (backslash + underscore)
311    ///
312    /// This allows `split('|')` to correctly identify cell boundaries without
313    /// accidentally splitting on:
314    /// - Literal pipes inside code: `| a | b |` → treated as single cell
315    /// - Escaped pipes: `a \| b` → treated as single cell containing literal pipe
316    ///
317    /// The original text is reconstructed from byte offsets, so these masks only
318    /// affect where we split, not the actual cell content.
319    pub fn mask_pipes_for_table_parsing(text: &str) -> String {
320        // First pass: mask inline code pipes
321        let after_code_masking = Self::mask_pipes_in_inline_code(text);
322
323        // Second pass: mask escaped pipes
324        let mut result = String::new();
325        let chars: Vec<char> = after_code_masking.chars().collect();
326        let mut i = 0;
327
328        while i < chars.len() {
329            if i + 1 < chars.len() && chars[i] == '\\' && chars[i + 1] == '|' {
330                // Found escaped pipe: \|
331                // Replace with \_ to keep same byte length while preventing split
332                result.push('\\');
333                result.push('_'); // Mask the pipe
334                i += 2;
335            } else {
336                result.push(chars[i]);
337                i += 1;
338            }
339        }
340
341        result
342    }
343
344    /// Determine the pipe style of a table row
345    pub fn determine_pipe_style(line: &str) -> Option<&'static str> {
346        let trimmed = line.trim();
347        if !trimmed.contains('|') {
348            return None;
349        }
350
351        let has_leading = trimmed.starts_with('|');
352        let has_trailing = trimmed.ends_with('|');
353
354        match (has_leading, has_trailing) {
355            (true, true) => Some("leading_and_trailing"),
356            (true, false) => Some("leading_only"),
357            (false, true) => Some("trailing_only"),
358            (false, false) => Some("no_leading_or_trailing"),
359        }
360    }
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use crate::lint_context::LintContext;
367
368    #[test]
369    fn test_is_potential_table_row() {
370        // Basic valid table rows
371        assert!(TableUtils::is_potential_table_row("| Header 1 | Header 2 |"));
372        assert!(TableUtils::is_potential_table_row("| Cell 1 | Cell 2 |"));
373        assert!(TableUtils::is_potential_table_row("Cell 1 | Cell 2"));
374        assert!(TableUtils::is_potential_table_row("| Cell |")); // Single-column tables are valid in GFM
375
376        // Multiple cells
377        assert!(TableUtils::is_potential_table_row("| A | B | C | D | E |"));
378
379        // With whitespace
380        assert!(TableUtils::is_potential_table_row("  | Indented | Table |  "));
381        assert!(TableUtils::is_potential_table_row("| Spaces | Around |"));
382
383        // Not table rows
384        assert!(!TableUtils::is_potential_table_row("- List item"));
385        assert!(!TableUtils::is_potential_table_row("* Another list"));
386        assert!(!TableUtils::is_potential_table_row("+ Plus list"));
387        assert!(!TableUtils::is_potential_table_row("Regular text"));
388        assert!(!TableUtils::is_potential_table_row(""));
389        assert!(!TableUtils::is_potential_table_row("   "));
390
391        // Code blocks
392        assert!(!TableUtils::is_potential_table_row("`code with | pipe`"));
393        assert!(!TableUtils::is_potential_table_row("``multiple | backticks``"));
394
395        // Single pipe not enough
396        assert!(!TableUtils::is_potential_table_row("Just one |"));
397        assert!(!TableUtils::is_potential_table_row("| Just one"));
398
399        // Very long cells are valid in tables (no length limit for cell content)
400        let long_cell = "a".repeat(150);
401        assert!(TableUtils::is_potential_table_row(&format!("| {long_cell} | b |")));
402
403        // Cells with newlines
404        assert!(!TableUtils::is_potential_table_row("| Cell with\nnewline | Other |"));
405
406        // Empty cells (Issue #129)
407        assert!(TableUtils::is_potential_table_row("|||")); // Two empty cells
408        assert!(TableUtils::is_potential_table_row("||||")); // Three empty cells
409        assert!(TableUtils::is_potential_table_row("| | |")); // Two empty cells with spaces
410    }
411
412    #[test]
413    fn test_is_delimiter_row() {
414        // Basic delimiter rows
415        assert!(TableUtils::is_delimiter_row("|---|---|"));
416        assert!(TableUtils::is_delimiter_row("| --- | --- |"));
417        assert!(TableUtils::is_delimiter_row("|:---|---:|"));
418        assert!(TableUtils::is_delimiter_row("|:---:|:---:|"));
419
420        // With varying dash counts
421        assert!(TableUtils::is_delimiter_row("|-|--|"));
422        assert!(TableUtils::is_delimiter_row("|-------|----------|"));
423
424        // With whitespace
425        assert!(TableUtils::is_delimiter_row("|  ---  |  ---  |"));
426        assert!(TableUtils::is_delimiter_row("| :--- | ---: |"));
427
428        // Multiple columns
429        assert!(TableUtils::is_delimiter_row("|---|---|---|---|"));
430
431        // Without leading/trailing pipes
432        assert!(TableUtils::is_delimiter_row("--- | ---"));
433        assert!(TableUtils::is_delimiter_row(":--- | ---:"));
434
435        // Not delimiter rows
436        assert!(!TableUtils::is_delimiter_row("| Header | Header |"));
437        assert!(!TableUtils::is_delimiter_row("Regular text"));
438        assert!(!TableUtils::is_delimiter_row(""));
439        assert!(!TableUtils::is_delimiter_row("|||"));
440        assert!(!TableUtils::is_delimiter_row("| | |"));
441
442        // Must have dashes
443        assert!(!TableUtils::is_delimiter_row("| : | : |"));
444        assert!(!TableUtils::is_delimiter_row("|    |    |"));
445
446        // Mixed content
447        assert!(!TableUtils::is_delimiter_row("| --- | text |"));
448        assert!(!TableUtils::is_delimiter_row("| abc | --- |"));
449    }
450
451    #[test]
452    fn test_count_cells() {
453        // Basic counts
454        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2 | Cell 3 |"), 3);
455        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 | Cell 3"), 3);
456        assert_eq!(TableUtils::count_cells("| Cell 1 | Cell 2"), 2);
457        assert_eq!(TableUtils::count_cells("Cell 1 | Cell 2 |"), 2);
458
459        // Single cell
460        assert_eq!(TableUtils::count_cells("| Cell |"), 1);
461        assert_eq!(TableUtils::count_cells("Cell"), 0); // No pipe
462
463        // Empty cells
464        assert_eq!(TableUtils::count_cells("|  |  |  |"), 3);
465        assert_eq!(TableUtils::count_cells("| | | |"), 3);
466
467        // Many cells
468        assert_eq!(TableUtils::count_cells("| A | B | C | D | E | F |"), 6);
469
470        // Edge cases
471        assert_eq!(TableUtils::count_cells("||"), 1); // One empty cell
472        assert_eq!(TableUtils::count_cells("|||"), 2); // Two empty cells
473
474        // No table
475        assert_eq!(TableUtils::count_cells("Regular text"), 0);
476        assert_eq!(TableUtils::count_cells(""), 0);
477        assert_eq!(TableUtils::count_cells("   "), 0);
478
479        // Whitespace handling
480        assert_eq!(TableUtils::count_cells("  | A | B |  "), 2);
481        assert_eq!(TableUtils::count_cells("|   A   |   B   |"), 2);
482    }
483
484    #[test]
485    fn test_count_cells_with_inline_code() {
486        // Test the user's actual example from Issue #34
487        assert_eq!(TableUtils::count_cells("| Challenge | Solution |"), 2);
488        assert_eq!(
489            TableUtils::count_cells("| Hour:minute:second formats | `^([0-1]?\\d|2[0-3]):[0-5]\\d:[0-5]\\d$` |"),
490            2
491        );
492
493        // Test basic inline code with pipes
494        assert_eq!(TableUtils::count_cells("| Command | `echo | grep` |"), 2);
495        assert_eq!(TableUtils::count_cells("| A | `code | with | pipes` | B |"), 3);
496
497        // Test escaped pipes (correct GFM)
498        assert_eq!(TableUtils::count_cells("| Command | `echo \\| grep` |"), 2);
499
500        // Test multiple inline code blocks
501        assert_eq!(TableUtils::count_cells("| `code | one` | `code | two` |"), 2);
502
503        // Test edge cases
504        assert_eq!(TableUtils::count_cells("| Empty inline | `` | cell |"), 3);
505        assert_eq!(TableUtils::count_cells("| `single|pipe` |"), 1);
506
507        // Test that basic table structure still works
508        assert_eq!(TableUtils::count_cells("| A | B | C |"), 3);
509        assert_eq!(TableUtils::count_cells("| One | Two |"), 2);
510    }
511
512    #[test]
513    fn test_determine_pipe_style() {
514        // All pipe styles
515        assert_eq!(
516            TableUtils::determine_pipe_style("| Cell 1 | Cell 2 |"),
517            Some("leading_and_trailing")
518        );
519        assert_eq!(
520            TableUtils::determine_pipe_style("| Cell 1 | Cell 2"),
521            Some("leading_only")
522        );
523        assert_eq!(
524            TableUtils::determine_pipe_style("Cell 1 | Cell 2 |"),
525            Some("trailing_only")
526        );
527        assert_eq!(
528            TableUtils::determine_pipe_style("Cell 1 | Cell 2"),
529            Some("no_leading_or_trailing")
530        );
531
532        // With whitespace
533        assert_eq!(
534            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2 |  "),
535            Some("leading_and_trailing")
536        );
537        assert_eq!(
538            TableUtils::determine_pipe_style("  | Cell 1 | Cell 2  "),
539            Some("leading_only")
540        );
541
542        // No pipes
543        assert_eq!(TableUtils::determine_pipe_style("Regular text"), None);
544        assert_eq!(TableUtils::determine_pipe_style(""), None);
545        assert_eq!(TableUtils::determine_pipe_style("   "), None);
546
547        // Single pipe cases
548        assert_eq!(TableUtils::determine_pipe_style("|"), Some("leading_and_trailing"));
549        assert_eq!(TableUtils::determine_pipe_style("| Cell"), Some("leading_only"));
550        assert_eq!(TableUtils::determine_pipe_style("Cell |"), Some("trailing_only"));
551    }
552
553    #[test]
554    fn test_find_table_blocks_simple() {
555        let content = "| Header 1 | Header 2 |
556|-----------|-----------|
557| Cell 1    | Cell 2    |
558| Cell 3    | Cell 4    |";
559
560        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561
562        let tables = TableUtils::find_table_blocks(content, &ctx);
563        assert_eq!(tables.len(), 1);
564
565        let table = &tables[0];
566        assert_eq!(table.start_line, 0);
567        assert_eq!(table.end_line, 3);
568        assert_eq!(table.header_line, 0);
569        assert_eq!(table.delimiter_line, 1);
570        assert_eq!(table.content_lines, vec![2, 3]);
571    }
572
573    #[test]
574    fn test_find_table_blocks_multiple() {
575        let content = "Some text
576
577| Table 1 | Col A |
578|----------|-------|
579| Data 1   | Val 1 |
580
581More text
582
583| Table 2 | Col 2 |
584|----------|-------|
585| Data 2   | Data  |";
586
587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588
589        let tables = TableUtils::find_table_blocks(content, &ctx);
590        assert_eq!(tables.len(), 2);
591
592        // First table
593        assert_eq!(tables[0].start_line, 2);
594        assert_eq!(tables[0].end_line, 4);
595        assert_eq!(tables[0].header_line, 2);
596        assert_eq!(tables[0].delimiter_line, 3);
597        assert_eq!(tables[0].content_lines, vec![4]);
598
599        // Second table
600        assert_eq!(tables[1].start_line, 8);
601        assert_eq!(tables[1].end_line, 10);
602        assert_eq!(tables[1].header_line, 8);
603        assert_eq!(tables[1].delimiter_line, 9);
604        assert_eq!(tables[1].content_lines, vec![10]);
605    }
606
607    #[test]
608    fn test_find_table_blocks_no_content_rows() {
609        let content = "| Header 1 | Header 2 |
610|-----------|-----------|
611
612Next paragraph";
613
614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
615
616        let tables = TableUtils::find_table_blocks(content, &ctx);
617        assert_eq!(tables.len(), 1);
618
619        let table = &tables[0];
620        assert_eq!(table.start_line, 0);
621        assert_eq!(table.end_line, 1); // Just header and delimiter
622        assert_eq!(table.content_lines.len(), 0);
623    }
624
625    #[test]
626    fn test_find_table_blocks_in_code_block() {
627        let content = "```
628| Not | A | Table |
629|-----|---|-------|
630| In  | Code | Block |
631```
632
633| Real | Table |
634|------|-------|
635| Data | Here  |";
636
637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638
639        let tables = TableUtils::find_table_blocks(content, &ctx);
640        assert_eq!(tables.len(), 1); // Only the table outside code block
641
642        let table = &tables[0];
643        assert_eq!(table.header_line, 6);
644        assert_eq!(table.delimiter_line, 7);
645    }
646
647    #[test]
648    fn test_find_table_blocks_no_tables() {
649        let content = "Just regular text
650No tables here
651- List item with | pipe
652* Another list item";
653
654        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
655
656        let tables = TableUtils::find_table_blocks(content, &ctx);
657        assert_eq!(tables.len(), 0);
658    }
659
660    #[test]
661    fn test_find_table_blocks_malformed() {
662        let content = "| Header without delimiter |
663| This looks like table |
664But no delimiter row
665
666| Proper | Table |
667|---------|-------|
668| Data    | Here  |";
669
670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671
672        let tables = TableUtils::find_table_blocks(content, &ctx);
673        assert_eq!(tables.len(), 1); // Only the proper table
674        assert_eq!(tables[0].header_line, 4);
675    }
676
677    #[test]
678    fn test_edge_cases() {
679        // Test empty content
680        assert!(!TableUtils::is_potential_table_row(""));
681        assert!(!TableUtils::is_delimiter_row(""));
682        assert_eq!(TableUtils::count_cells(""), 0);
683        assert_eq!(TableUtils::determine_pipe_style(""), None);
684
685        // Test whitespace only
686        assert!(!TableUtils::is_potential_table_row("   "));
687        assert!(!TableUtils::is_delimiter_row("   "));
688        assert_eq!(TableUtils::count_cells("   "), 0);
689        assert_eq!(TableUtils::determine_pipe_style("   "), None);
690
691        // Test single character
692        assert!(!TableUtils::is_potential_table_row("|"));
693        assert!(!TableUtils::is_delimiter_row("|"));
694        assert_eq!(TableUtils::count_cells("|"), 0); // Need at least 2 parts
695
696        // Test very long lines are valid table rows (no length limit)
697        // Test both single-column and multi-column long lines
698        let long_single = format!("| {} |", "a".repeat(200));
699        assert!(TableUtils::is_potential_table_row(&long_single)); // Single-column table with long content
700
701        let long_multi = format!("| {} | {} |", "a".repeat(200), "b".repeat(200));
702        assert!(TableUtils::is_potential_table_row(&long_multi)); // Multi-column table with long content
703
704        // Test unicode
705        assert!(TableUtils::is_potential_table_row("| 你好 | 世界 |"));
706        assert!(TableUtils::is_potential_table_row("| émoji | 🎉 |"));
707        assert_eq!(TableUtils::count_cells("| 你好 | 世界 |"), 2);
708    }
709
710    #[test]
711    fn test_table_block_struct() {
712        let block = TableBlock {
713            start_line: 0,
714            end_line: 5,
715            header_line: 0,
716            delimiter_line: 1,
717            content_lines: vec![2, 3, 4, 5],
718        };
719
720        // Test Debug trait
721        let debug_str = format!("{block:?}");
722        assert!(debug_str.contains("TableBlock"));
723        assert!(debug_str.contains("start_line: 0"));
724
725        // Test Clone trait
726        let cloned = block.clone();
727        assert_eq!(cloned.start_line, block.start_line);
728        assert_eq!(cloned.end_line, block.end_line);
729        assert_eq!(cloned.header_line, block.header_line);
730        assert_eq!(cloned.delimiter_line, block.delimiter_line);
731        assert_eq!(cloned.content_lines, block.content_lines);
732    }
733}