rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod ast_utils;
7pub mod code_block_utils;
8// DocumentStructure has been merged into LintContext
9// pub mod document_structure;
10pub mod early_returns;
11pub mod element_cache;
12pub mod emphasis_utils;
13pub mod fix_utils;
14pub mod header_id_utils;
15pub mod jinja_utils;
16pub mod kramdown_utils;
17pub mod line_ending;
18pub mod markdown_elements;
19pub mod mkdocs_admonitions;
20pub mod mkdocs_common;
21pub mod mkdocs_critic;
22pub mod mkdocs_footnotes;
23pub mod mkdocs_patterns;
24pub mod mkdocs_snippets;
25pub mod mkdocs_tabs;
26pub mod mkdocs_test_utils;
27pub mod mkdocstrings_refs;
28pub mod range_utils;
29pub mod regex_cache;
30pub mod skip_context;
31pub mod string_interner;
32pub mod table_utils;
33pub mod text_reflow;
34
35pub use ast_utils::AstCache;
36pub use code_block_utils::CodeBlockUtils;
37// pub use document_structure::DocumentStructure;
38pub use line_ending::{
39    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
40    normalize_line_ending,
41};
42pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
43pub use range_utils::LineIndex;
44
45/// Trait for string-related extensions
46pub trait StrExt {
47    /// Replace trailing spaces with a specified replacement string
48    fn replace_trailing_spaces(&self, replacement: &str) -> String;
49
50    /// Check if the string has trailing whitespace
51    fn has_trailing_spaces(&self) -> bool;
52
53    /// Count the number of trailing spaces in the string
54    fn trailing_spaces(&self) -> usize;
55}
56
57impl StrExt for str {
58    fn replace_trailing_spaces(&self, replacement: &str) -> String {
59        // Custom implementation to handle both newlines and tabs specially
60
61        // Check if string ends with newline
62        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
63            (stripped, true)
64        } else {
65            (self, false)
66        };
67
68        // Find where the trailing spaces begin
69        let mut non_space_len = content.len();
70        for c in content.chars().rev() {
71            if c == ' ' {
72                non_space_len -= 1;
73            } else {
74                break;
75            }
76        }
77
78        // Build the final string
79        let mut result =
80            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
81        result.push_str(&content[..non_space_len]);
82        result.push_str(replacement);
83        if ends_with_newline {
84            result.push('\n');
85        }
86
87        result
88    }
89
90    fn has_trailing_spaces(&self) -> bool {
91        self.trailing_spaces() > 0
92    }
93
94    fn trailing_spaces(&self) -> usize {
95        // Custom implementation to handle both newlines and tabs specially
96
97        // Prepare the string without newline if it ends with one
98        let content = self.strip_suffix('\n').unwrap_or(self);
99
100        // Count only trailing spaces at the end, not tabs
101        let mut space_count = 0;
102        for c in content.chars().rev() {
103            if c == ' ' {
104                space_count += 1;
105            } else {
106                break;
107            }
108        }
109
110        space_count
111    }
112}
113
114use std::collections::hash_map::DefaultHasher;
115use std::hash::{Hash, Hasher};
116
117/// Fast hash function for string content
118///
119/// This utility function provides a quick way to generate a hash from string content
120/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
121///
122/// # Arguments
123///
124/// * `content` - The string content to hash
125///
126/// # Returns
127///
128/// A 64-bit hash value derived from the content
129pub fn fast_hash(content: &str) -> u64 {
130    let mut hasher = DefaultHasher::new();
131    content.hash(&mut hasher);
132    hasher.finish()
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn test_detect_line_ending_pure_lf() {
141        // Test content with only LF line endings
142        let content = "First line\nSecond line\nThird line\n";
143        assert_eq!(detect_line_ending(content), "\n");
144    }
145
146    #[test]
147    fn test_detect_line_ending_pure_crlf() {
148        // Test content with only CRLF line endings
149        let content = "First line\r\nSecond line\r\nThird line\r\n";
150        assert_eq!(detect_line_ending(content), "\r\n");
151    }
152
153    #[test]
154    fn test_detect_line_ending_mixed_more_lf() {
155        // Test content with mixed line endings where LF is more common
156        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
157        assert_eq!(detect_line_ending(content), "\n");
158    }
159
160    #[test]
161    fn test_detect_line_ending_mixed_more_crlf() {
162        // Test content with mixed line endings where CRLF is more common
163        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
164        assert_eq!(detect_line_ending(content), "\r\n");
165    }
166
167    #[test]
168    fn test_detect_line_ending_empty_string() {
169        // Test empty string - should default to LF
170        let content = "";
171        assert_eq!(detect_line_ending(content), "\n");
172    }
173
174    #[test]
175    fn test_detect_line_ending_single_line_no_ending() {
176        // Test single line without any line endings - should default to LF
177        let content = "This is a single line with no line ending";
178        assert_eq!(detect_line_ending(content), "\n");
179    }
180
181    #[test]
182    fn test_detect_line_ending_equal_lf_and_crlf() {
183        // Test edge case with equal number of CRLF and LF
184        // Since LF count is calculated as total '\n' minus CRLF count,
185        // and the algorithm uses > (not >=), it should default to LF
186        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
187        assert_eq!(detect_line_ending(content), "\n");
188    }
189
190    #[test]
191    fn test_detect_line_ending_single_lf() {
192        // Test with just a single LF
193        let content = "Line 1\n";
194        assert_eq!(detect_line_ending(content), "\n");
195    }
196
197    #[test]
198    fn test_detect_line_ending_single_crlf() {
199        // Test with just a single CRLF
200        let content = "Line 1\r\n";
201        assert_eq!(detect_line_ending(content), "\r\n");
202    }
203
204    #[test]
205    fn test_detect_line_ending_embedded_cr() {
206        // Test with CR characters that are not part of CRLF
207        // These should not affect the count
208        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
209        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
210        assert_eq!(detect_line_ending(content), "\n");
211    }
212}