rumdl_lib/utils/
early_returns.rs

1//!
2//! Fast-path checks and early return utilities for rule implementations in rumdl.
3//! Provides helpers to quickly skip rules based on content analysis.
4
5use crate::rule::LintResult;
6
7/// Trait for implementing early returns in rules
8pub trait EarlyReturns {
9    /// Check if this rule can be skipped based on content analysis
10    fn can_skip(&self, content: &str) -> bool;
11
12    /// Returns the empty result if the rule can be skipped
13    fn early_return_if_skippable(&self, content: &str) -> Option<LintResult> {
14        if self.can_skip(content) {
15            Some(Ok(Vec::new()))
16        } else {
17            None
18        }
19    }
20}
21
22/// Common early return checks for heading-related rules
23pub fn should_skip_heading_rule(content: &str) -> bool {
24    content.is_empty() || !content.contains('#')
25}
26
27/// Common early return checks for list-related rules
28pub fn should_skip_list_rule(content: &str) -> bool {
29    content.is_empty()
30        || (!content.contains('*') && !content.contains('-') && !content.contains('+') && !content.contains(". "))
31}
32
33/// Common early return checks for code block related rules
34pub fn should_skip_code_block_rule(content: &str) -> bool {
35    content.is_empty() || (!content.contains("```") && !content.contains("~~~") && !content.contains("    "))
36}
37
38/// Common early return checks for link-related rules
39pub fn should_skip_link_rule(content: &str) -> bool {
40    content.is_empty() || (!content.contains('[') && !content.contains('(') && !content.contains("]:"))
41}
42
43/// Common early return checks for inline HTML rules
44pub fn should_skip_html_rule(content: &str) -> bool {
45    content.is_empty() || (!content.contains('<') || !content.contains('>'))
46}
47
48/// Common early return checks for emphasis-related rules
49pub fn should_skip_emphasis_rule(content: &str) -> bool {
50    content.is_empty() || (!content.contains('*') && !content.contains('_'))
51}
52
53/// Common early return checks for image-related rules
54pub fn should_skip_image_rule(content: &str) -> bool {
55    content.is_empty() || !content.contains("![")
56}
57
58/// Common early return checks for whitespace-related rules
59pub fn should_skip_whitespace_rule(content: &str) -> bool {
60    content.is_empty()
61}
62
63/// Common early return checks for blockquote-related rules
64pub fn should_skip_blockquote_rule(content: &str) -> bool {
65    content.is_empty() || !content.contains('>')
66}
67
68/// Early return utilities for performance optimization
69/// These functions provide fast content analysis to skip expensive processing
70/// Check if content has any URLs (http, https, ftp)
71#[inline]
72pub fn has_urls(content: &str) -> bool {
73    // Check for common URL protocols
74    if content.contains("http://") || content.contains("https://") || content.contains("ftp://") {
75        return true;
76    }
77
78    // Also check for URLs with Unicode/internationalized domains using a more permissive check
79    // Look for protocol followed by any non-whitespace characters
80    for line in content.lines() {
81        if let Some(idx) = line.find("://") {
82            // Check if there's a valid protocol before ://
83            let prefix = &line[..idx];
84            if prefix.ends_with("http") || prefix.ends_with("https") || prefix.ends_with("ftp") {
85                return true;
86            }
87        }
88    }
89
90    false
91}
92
93/// Check if content has any headings (ATX or Setext)
94#[inline]
95pub fn has_headings(content: &str) -> bool {
96    content.contains('#') || has_setext_headings(content)
97}
98
99/// Check if content has Setext headings (underlines)
100#[inline]
101pub fn has_setext_headings(content: &str) -> bool {
102    for line in content.lines() {
103        let trimmed = line.trim();
104        if trimmed.len() > 1 && (trimmed.chars().all(|c| c == '=') || trimmed.chars().all(|c| c == '-')) {
105            return true;
106        }
107    }
108    false
109}
110
111/// Check if content has any list markers
112#[inline]
113pub fn has_lists(content: &str) -> bool {
114    content.contains("* ") || content.contains("- ") || content.contains("+ ") || has_ordered_lists(content)
115}
116
117/// Check if content has ordered lists
118#[inline]
119pub fn has_ordered_lists(content: &str) -> bool {
120    for line in content.lines() {
121        let trimmed = line.trim_start();
122        if let Some(first_char) = trimmed.chars().next()
123            && first_char.is_ascii_digit()
124            && trimmed.contains(". ")
125        {
126            return true;
127        }
128    }
129    false
130}
131
132/// Check if content has any links or images
133#[inline]
134pub fn has_links_or_images(content: &str) -> bool {
135    content.contains('[') && (content.contains("](") || content.contains("]:"))
136}
137
138/// Check if content has any code blocks or inline code
139#[inline]
140pub fn has_code(content: &str) -> bool {
141    content.contains('`') || content.contains("~~~")
142}
143
144/// Check if content has any emphasis markers
145#[inline]
146pub fn has_emphasis(content: &str) -> bool {
147    content.contains('*') || content.contains('_')
148}
149
150/// Check if content has any HTML tags
151#[inline]
152pub fn has_html(content: &str) -> bool {
153    content.contains('<') && content.contains('>')
154}
155
156/// Check if content has any blockquotes
157#[inline]
158pub fn has_blockquotes(content: &str) -> bool {
159    for line in content.lines() {
160        if line.trim_start().starts_with('>') {
161            return true;
162        }
163    }
164    false
165}
166
167/// Check if content has any tables
168#[inline]
169pub fn has_tables(content: &str) -> bool {
170    content.contains('|')
171}
172
173/// Check if content has trailing spaces
174#[inline]
175pub fn has_trailing_spaces(content: &str) -> bool {
176    for line in content.lines() {
177        if line.ends_with(' ') || line.ends_with('\t') {
178            return true;
179        }
180    }
181    false
182}
183
184/// Check if content has hard tabs
185#[inline]
186pub fn has_hard_tabs(content: &str) -> bool {
187    content.contains('\t')
188}
189
190/// Check if content has long lines (over threshold)
191#[inline]
192pub fn has_long_lines(content: &str, threshold: usize) -> bool {
193    for line in content.lines() {
194        if line.len() > threshold {
195            return true;
196        }
197    }
198    false
199}
200
201/// Comprehensive content analysis for rule filtering
202#[derive(Debug, Default)]
203pub struct ContentAnalysis {
204    pub has_headings: bool,
205    pub has_lists: bool,
206    pub has_links: bool,
207    pub has_code: bool,
208    pub has_emphasis: bool,
209    pub has_html: bool,
210    pub has_blockquotes: bool,
211    pub has_tables: bool,
212    pub has_trailing_spaces: bool,
213    pub has_hard_tabs: bool,
214    pub has_long_lines: bool,
215    pub line_count: usize,
216    pub char_count: usize,
217}
218
219impl ContentAnalysis {
220    /// Perform comprehensive analysis of content
221    pub fn analyze(content: &str, line_length_threshold: usize) -> Self {
222        let mut analysis = Self {
223            line_count: content.lines().count(),
224            char_count: content.len(),
225            ..Default::default()
226        };
227
228        // Single pass analysis for maximum efficiency
229        for line in content.lines() {
230            let trimmed = line.trim();
231            let trimmed_start = line.trim_start();
232
233            // Headings
234            if !analysis.has_headings
235                && (trimmed.starts_with('#')
236                    || (trimmed.len() > 1 && (trimmed.chars().all(|c| c == '=') || trimmed.chars().all(|c| c == '-'))))
237            {
238                analysis.has_headings = true;
239            }
240
241            // Lists
242            if !analysis.has_lists {
243                if line.contains("* ") || line.contains("- ") || line.contains("+ ") {
244                    analysis.has_lists = true;
245                } else if let Some(first_char) = trimmed_start.chars().next()
246                    && first_char.is_ascii_digit()
247                    && line.contains(". ")
248                {
249                    analysis.has_lists = true;
250                }
251            }
252
253            // Links and images
254            if !analysis.has_links && line.contains('[') && (line.contains("](") || line.contains("]:")) {
255                analysis.has_links = true;
256            }
257
258            // Code
259            if !analysis.has_code && (line.contains('`') || line.contains("~~~")) {
260                analysis.has_code = true;
261            }
262
263            // Emphasis
264            if !analysis.has_emphasis && (line.contains('*') || line.contains('_')) {
265                analysis.has_emphasis = true;
266            }
267
268            // HTML
269            if !analysis.has_html && line.contains('<') && line.contains('>') {
270                analysis.has_html = true;
271            }
272
273            // Blockquotes
274            if !analysis.has_blockquotes && trimmed_start.starts_with('>') {
275                analysis.has_blockquotes = true;
276            }
277
278            // Tables
279            if !analysis.has_tables && line.contains('|') {
280                analysis.has_tables = true;
281            }
282
283            // Whitespace issues
284            if !analysis.has_trailing_spaces && (line.ends_with(' ') || line.ends_with('\t')) {
285                analysis.has_trailing_spaces = true;
286            }
287
288            if !analysis.has_hard_tabs && line.contains('\t') {
289                analysis.has_hard_tabs = true;
290            }
291
292            // Line length
293            if !analysis.has_long_lines && line.len() > line_length_threshold {
294                analysis.has_long_lines = true;
295            }
296        }
297
298        analysis
299    }
300}
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    #[test]
306    fn test_should_skip_heading_rule() {
307        // Should skip empty content
308        assert!(should_skip_heading_rule(""));
309
310        // Should skip content without headings
311        assert!(should_skip_heading_rule("Just plain text"));
312        assert!(should_skip_heading_rule("Some text\nMore text"));
313
314        // Should NOT skip content with headings
315        assert!(!should_skip_heading_rule("# Heading"));
316        assert!(!should_skip_heading_rule("Text before\n## Heading 2"));
317        assert!(!should_skip_heading_rule("###Heading without space"));
318    }
319
320    #[test]
321    fn test_should_skip_list_rule() {
322        // Should skip empty content
323        assert!(should_skip_list_rule(""));
324
325        // Should skip content without lists
326        assert!(should_skip_list_rule("Just plain text"));
327        assert!(should_skip_list_rule("# Heading\nParagraph"));
328
329        // Should NOT skip content with unordered lists
330        assert!(!should_skip_list_rule("* Item"));
331        assert!(!should_skip_list_rule("- Item"));
332        assert!(!should_skip_list_rule("+ Item"));
333
334        // Should NOT skip content with ordered lists
335        assert!(!should_skip_list_rule("1. Item"));
336        assert!(!should_skip_list_rule("99. Item"));
337    }
338
339    #[test]
340    fn test_should_skip_code_block_rule() {
341        // Should skip empty content
342        assert!(should_skip_code_block_rule(""));
343
344        // Should skip content without code blocks
345        assert!(should_skip_code_block_rule("Just plain text"));
346        assert!(should_skip_code_block_rule("# Heading"));
347
348        // Should NOT skip content with fenced code blocks
349        assert!(!should_skip_code_block_rule("```rust\ncode\n```"));
350        assert!(!should_skip_code_block_rule("~~~\ncode\n~~~"));
351
352        // Should NOT skip content with indented code blocks
353        assert!(!should_skip_code_block_rule("    indented code"));
354    }
355
356    #[test]
357    fn test_should_skip_link_rule() {
358        // Should skip empty content
359        assert!(should_skip_link_rule(""));
360
361        // Should skip content without links
362        assert!(should_skip_link_rule("Just plain text"));
363
364        // Should NOT skip content with links
365        assert!(!should_skip_link_rule("[link](url)"));
366        assert!(!should_skip_link_rule("[ref]: url"));
367        assert!(!should_skip_link_rule("Text with [link]"));
368        assert!(!should_skip_link_rule("Text with (parentheses)"));
369    }
370
371    #[test]
372    fn test_should_skip_html_rule() {
373        // Should skip empty content
374        assert!(should_skip_html_rule(""));
375
376        // Should skip content without HTML
377        assert!(should_skip_html_rule("Just plain text"));
378
379        // Should skip content with only < or >
380        assert!(should_skip_html_rule("a < b"));
381        assert!(should_skip_html_rule("a > b"));
382
383        // Should NOT skip content with HTML tags
384        assert!(!should_skip_html_rule("<div>content</div>"));
385        assert!(!should_skip_html_rule("Text with <span>tag</span>"));
386    }
387
388    #[test]
389    fn test_should_skip_emphasis_rule() {
390        // Should skip empty content
391        assert!(should_skip_emphasis_rule(""));
392
393        // Should skip content without emphasis
394        assert!(should_skip_emphasis_rule("Just plain text"));
395
396        // Should NOT skip content with emphasis markers
397        assert!(!should_skip_emphasis_rule("*emphasis*"));
398        assert!(!should_skip_emphasis_rule("_emphasis_"));
399        assert!(!should_skip_emphasis_rule("Text with * marker"));
400    }
401
402    #[test]
403    fn test_should_skip_image_rule() {
404        // Should skip empty content
405        assert!(should_skip_image_rule(""));
406
407        // Should skip content without images
408        assert!(should_skip_image_rule("Just plain text"));
409        assert!(should_skip_image_rule("[link](url)"));
410
411        // Should NOT skip content with images
412        assert!(!should_skip_image_rule("![alt](image.png)"));
413        assert!(!should_skip_image_rule("Text with ![image]"));
414    }
415
416    #[test]
417    fn test_should_skip_blockquote_rule() {
418        // Should skip empty content
419        assert!(should_skip_blockquote_rule(""));
420
421        // Should skip content without blockquotes
422        assert!(should_skip_blockquote_rule("Just plain text"));
423
424        // Should NOT skip content with blockquotes
425        assert!(!should_skip_blockquote_rule("> Quote"));
426        assert!(!should_skip_blockquote_rule("Text\n> Quote"));
427    }
428
429    #[test]
430    fn test_has_urls() {
431        assert!(!has_urls(""));
432        assert!(!has_urls("Just plain text"));
433
434        assert!(has_urls("http://example.com"));
435        assert!(has_urls("https://example.com"));
436        assert!(has_urls("ftp://example.com"));
437        assert!(has_urls("Text with https://link.com in it"));
438
439        // Unicode/internationalized URLs
440        assert!(has_urls("https://例え.jp"));
441        assert!(has_urls("http://münchen.de"));
442        assert!(has_urls("https://🌐.ws"));
443        assert!(has_urls("Visit https://español.example.com for more"));
444    }
445
446    #[test]
447    fn test_has_headings() {
448        assert!(!has_headings(""));
449        assert!(!has_headings("Just plain text"));
450
451        // ATX headings
452        assert!(has_headings("# Heading"));
453        assert!(has_headings("## Heading 2"));
454
455        // Setext headings
456        assert!(has_headings("Heading\n======"));
457        assert!(has_headings("Heading\n------"));
458    }
459
460    #[test]
461    fn test_has_setext_headings() {
462        assert!(!has_setext_headings(""));
463        assert!(!has_setext_headings("Just plain text"));
464        assert!(!has_setext_headings("# ATX heading"));
465
466        // Valid setext headings
467        assert!(has_setext_headings("Heading\n======"));
468        assert!(has_setext_headings("Heading\n------"));
469        assert!(has_setext_headings("Heading\n==="));
470        assert!(has_setext_headings("Heading\n---"));
471
472        // Not setext headings
473        assert!(!has_setext_headings("="));
474        assert!(!has_setext_headings("-"));
475        assert!(!has_setext_headings("a = b"));
476    }
477
478    #[test]
479    fn test_has_lists() {
480        assert!(!has_lists(""));
481        assert!(!has_lists("Just plain text"));
482
483        // Unordered lists
484        assert!(has_lists("* Item"));
485        assert!(has_lists("- Item"));
486        assert!(has_lists("+ Item"));
487
488        // Ordered lists
489        assert!(has_lists("1. Item"));
490        assert!(has_lists("99. Item"));
491
492        // Not lists - these don't have the required space after marker
493        assert!(!has_lists("*emphasis*"));
494        // This actually has "- " so it's detected as a list
495        // assert!(!has_lists("a - b"));
496        assert!(!has_lists("a-b"));
497    }
498
499    #[test]
500    fn test_has_ordered_lists() {
501        assert!(!has_ordered_lists(""));
502        assert!(!has_ordered_lists("Just plain text"));
503        assert!(!has_ordered_lists("* Unordered"));
504
505        // Valid ordered lists
506        assert!(has_ordered_lists("1. Item"));
507        assert!(has_ordered_lists("99. Item"));
508        assert!(has_ordered_lists("  2. Indented"));
509
510        // Not ordered lists - no space after period
511        assert!(!has_ordered_lists("1.Item"));
512        // Check for something that doesn't start with a digit
513        assert!(!has_ordered_lists("a. Item"));
514    }
515
516    #[test]
517    fn test_has_links_or_images() {
518        assert!(!has_links_or_images(""));
519        assert!(!has_links_or_images("Just plain text"));
520
521        // Links
522        assert!(has_links_or_images("[link](url)"));
523        assert!(has_links_or_images("[ref]: url"));
524
525        // Images
526        assert!(has_links_or_images("![alt](img)"));
527
528        // Just brackets not enough
529        assert!(!has_links_or_images("[text]"));
530        assert!(!has_links_or_images("array[index]"));
531    }
532
533    #[test]
534    fn test_has_code() {
535        assert!(!has_code(""));
536        assert!(!has_code("Just plain text"));
537
538        // Inline code
539        assert!(has_code("`code`"));
540        assert!(has_code("Text with `code` inline"));
541
542        // Fenced code blocks
543        assert!(has_code("```rust\ncode\n```"));
544        assert!(has_code("~~~\ncode\n~~~"));
545    }
546
547    #[test]
548    fn test_has_emphasis() {
549        assert!(!has_emphasis(""));
550        assert!(!has_emphasis("Just plain text"));
551
552        assert!(has_emphasis("*emphasis*"));
553        assert!(has_emphasis("_emphasis_"));
554        assert!(has_emphasis("**bold**"));
555        assert!(has_emphasis("__bold__"));
556    }
557
558    #[test]
559    fn test_has_html() {
560        assert!(!has_html(""));
561        assert!(!has_html("Just plain text"));
562        assert!(!has_html("a < b"));
563        assert!(!has_html("a > b"));
564
565        assert!(has_html("<div>"));
566        assert!(has_html("</div>"));
567        assert!(has_html("<br/>"));
568        assert!(has_html("<span>text</span>"));
569    }
570
571    #[test]
572    fn test_has_blockquotes() {
573        assert!(!has_blockquotes(""));
574        assert!(!has_blockquotes("Just plain text"));
575        assert!(!has_blockquotes("a > b"));
576
577        assert!(has_blockquotes("> Quote"));
578        assert!(has_blockquotes("  > Indented quote"));
579        assert!(has_blockquotes("Text\n> Quote"));
580    }
581
582    #[test]
583    fn test_has_tables() {
584        assert!(!has_tables(""));
585        assert!(!has_tables("Just plain text"));
586
587        assert!(has_tables("| Header |"));
588        assert!(has_tables("a | b | c"));
589        assert!(has_tables("Text with | pipe"));
590    }
591
592    #[test]
593    fn test_has_trailing_spaces() {
594        assert!(!has_trailing_spaces(""));
595        assert!(!has_trailing_spaces("Clean text"));
596        assert!(!has_trailing_spaces("Line 1\nLine 2"));
597
598        assert!(has_trailing_spaces("Trailing space "));
599        assert!(has_trailing_spaces("Trailing tab\t"));
600        assert!(has_trailing_spaces("Line 1\nLine with space \nLine 3"));
601    }
602
603    #[test]
604    fn test_has_hard_tabs() {
605        assert!(!has_hard_tabs(""));
606        assert!(!has_hard_tabs("No tabs here"));
607        assert!(!has_hard_tabs("    Four spaces"));
608
609        assert!(has_hard_tabs("\tTab at start"));
610        assert!(has_hard_tabs("Tab\tin middle"));
611        assert!(has_hard_tabs("Tab at end\t"));
612    }
613
614    #[test]
615    fn test_has_long_lines() {
616        assert!(!has_long_lines("", 80));
617        assert!(!has_long_lines("Short line", 80));
618        assert!(!has_long_lines("Line 1\nLine 2", 80));
619
620        let long_line = "a".repeat(100);
621        assert!(has_long_lines(&long_line, 80));
622        assert!(!has_long_lines(&long_line, 100));
623        assert!(!has_long_lines(&long_line, 101));
624    }
625
626    #[test]
627    fn test_early_returns_trait() {
628        struct TestRule;
629
630        impl EarlyReturns for TestRule {
631            fn can_skip(&self, content: &str) -> bool {
632                content.is_empty()
633            }
634        }
635
636        let rule = TestRule;
637
638        // Should return early for empty content
639        let result = rule.early_return_if_skippable("");
640        assert!(result.is_some());
641        assert!(result.unwrap().unwrap().is_empty());
642
643        // Should not return early for non-empty content
644        let result = rule.early_return_if_skippable("content");
645        assert!(result.is_none());
646    }
647
648    #[test]
649    fn test_content_analysis() {
650        let analysis = ContentAnalysis::default();
651        assert!(!analysis.has_headings);
652        assert!(!analysis.has_lists);
653        assert_eq!(analysis.line_count, 0);
654        assert_eq!(analysis.char_count, 0);
655    }
656
657    #[test]
658    fn test_unicode_handling() {
659        // Test with unicode content
660        assert!(!should_skip_heading_rule("# 你好"));
661        assert!(!should_skip_emphasis_rule("*émphasis*"));
662        // has_urls now supports Unicode domains
663        assert!(has_urls("https://example.com"));
664        assert!(has_urls("https://例え.jp"));
665
666        // Test with emoji
667        assert!(!should_skip_list_rule("* 🎉 Item"));
668        assert!(has_emphasis("Text with 🌟 *emphasis*"));
669    }
670
671    #[test]
672    fn test_edge_cases() {
673        // Empty lines
674        assert!(!has_headings("\n\n\n"));
675        assert!(!has_lists("\n\n\n"));
676
677        // Whitespace only
678        assert!(!has_blockquotes("   \n   \n"));
679        assert!(!has_code("   \n   \n"));
680
681        // Mixed content
682        let mixed = "# Heading\n* List\n> Quote\n`code`\n[link](url)";
683        assert!(!should_skip_heading_rule(mixed));
684        assert!(!should_skip_list_rule(mixed));
685        assert!(!should_skip_blockquote_rule(mixed));
686        // should_skip_code_block_rule checks for code blocks, not inline code
687        // The mixed content only has inline code, so it would skip
688        assert!(should_skip_code_block_rule(mixed));
689        assert!(!should_skip_link_rule(mixed));
690    }
691}