rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod ast_utils;
7pub mod code_block_utils;
8pub mod document_structure;
9pub mod early_returns;
10pub mod element_cache;
11pub mod emphasis_utils;
12pub mod fix_utils;
13pub mod header_id_utils;
14pub mod kramdown_utils;
15pub mod line_ending;
16pub mod markdown_elements;
17pub mod mkdocs_admonitions;
18pub mod mkdocs_common;
19pub mod mkdocs_critic;
20pub mod mkdocs_footnotes;
21pub mod mkdocs_patterns;
22pub mod mkdocs_snippets;
23pub mod mkdocs_tabs;
24pub mod mkdocs_test_utils;
25pub mod mkdocstrings_refs;
26pub mod range_utils;
27pub mod regex_cache;
28pub mod skip_context;
29pub mod string_interner;
30pub mod table_utils;
31pub mod text_reflow;
32
33pub use ast_utils::AstCache;
34pub use code_block_utils::CodeBlockUtils;
35pub use document_structure::DocumentStructure;
36pub use line_ending::{
37    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
38    normalize_line_ending,
39};
40pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
41pub use range_utils::LineIndex;
42
43/// Trait for string-related extensions
44pub trait StrExt {
45    /// Replace trailing spaces with a specified replacement string
46    fn replace_trailing_spaces(&self, replacement: &str) -> String;
47
48    /// Check if the string has trailing whitespace
49    fn has_trailing_spaces(&self) -> bool;
50
51    /// Count the number of trailing spaces in the string
52    fn trailing_spaces(&self) -> usize;
53}
54
55impl StrExt for str {
56    fn replace_trailing_spaces(&self, replacement: &str) -> String {
57        // Custom implementation to handle both newlines and tabs specially
58
59        // Check if string ends with newline
60        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
61            (stripped, true)
62        } else {
63            (self, false)
64        };
65
66        // Find where the trailing spaces begin
67        let mut non_space_len = content.len();
68        for c in content.chars().rev() {
69            if c == ' ' {
70                non_space_len -= 1;
71            } else {
72                break;
73            }
74        }
75
76        // Build the final string
77        let mut result =
78            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
79        result.push_str(&content[..non_space_len]);
80        result.push_str(replacement);
81        if ends_with_newline {
82            result.push('\n');
83        }
84
85        result
86    }
87
88    fn has_trailing_spaces(&self) -> bool {
89        self.trailing_spaces() > 0
90    }
91
92    fn trailing_spaces(&self) -> usize {
93        // Custom implementation to handle both newlines and tabs specially
94
95        // Prepare the string without newline if it ends with one
96        let content = self.strip_suffix('\n').unwrap_or(self);
97
98        // Count only trailing spaces at the end, not tabs
99        let mut space_count = 0;
100        for c in content.chars().rev() {
101            if c == ' ' {
102                space_count += 1;
103            } else {
104                break;
105            }
106        }
107
108        space_count
109    }
110}
111
112use std::collections::hash_map::DefaultHasher;
113use std::hash::{Hash, Hasher};
114
115/// Fast hash function for string content
116///
117/// This utility function provides a quick way to generate a hash from string content
118/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
119///
120/// # Arguments
121///
122/// * `content` - The string content to hash
123///
124/// # Returns
125///
126/// A 64-bit hash value derived from the content
127pub fn fast_hash(content: &str) -> u64 {
128    let mut hasher = DefaultHasher::new();
129    content.hash(&mut hasher);
130    hasher.finish()
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn test_detect_line_ending_pure_lf() {
139        // Test content with only LF line endings
140        let content = "First line\nSecond line\nThird line\n";
141        assert_eq!(detect_line_ending(content), "\n");
142    }
143
144    #[test]
145    fn test_detect_line_ending_pure_crlf() {
146        // Test content with only CRLF line endings
147        let content = "First line\r\nSecond line\r\nThird line\r\n";
148        assert_eq!(detect_line_ending(content), "\r\n");
149    }
150
151    #[test]
152    fn test_detect_line_ending_mixed_more_lf() {
153        // Test content with mixed line endings where LF is more common
154        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
155        assert_eq!(detect_line_ending(content), "\n");
156    }
157
158    #[test]
159    fn test_detect_line_ending_mixed_more_crlf() {
160        // Test content with mixed line endings where CRLF is more common
161        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
162        assert_eq!(detect_line_ending(content), "\r\n");
163    }
164
165    #[test]
166    fn test_detect_line_ending_empty_string() {
167        // Test empty string - should default to LF
168        let content = "";
169        assert_eq!(detect_line_ending(content), "\n");
170    }
171
172    #[test]
173    fn test_detect_line_ending_single_line_no_ending() {
174        // Test single line without any line endings - should default to LF
175        let content = "This is a single line with no line ending";
176        assert_eq!(detect_line_ending(content), "\n");
177    }
178
179    #[test]
180    fn test_detect_line_ending_equal_lf_and_crlf() {
181        // Test edge case with equal number of CRLF and LF
182        // Since LF count is calculated as total '\n' minus CRLF count,
183        // and the algorithm uses > (not >=), it should default to LF
184        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
185        assert_eq!(detect_line_ending(content), "\n");
186    }
187
188    #[test]
189    fn test_detect_line_ending_single_lf() {
190        // Test with just a single LF
191        let content = "Line 1\n";
192        assert_eq!(detect_line_ending(content), "\n");
193    }
194
195    #[test]
196    fn test_detect_line_ending_single_crlf() {
197        // Test with just a single CRLF
198        let content = "Line 1\r\n";
199        assert_eq!(detect_line_ending(content), "\r\n");
200    }
201
202    #[test]
203    fn test_detect_line_ending_embedded_cr() {
204        // Test with CR characters that are not part of CRLF
205        // These should not affect the count
206        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
207        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
208        assert_eq!(detect_line_ending(content), "\n");
209    }
210}