rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod ast_utils;
7pub mod code_block_utils;
8pub mod document_structure;
9pub mod early_returns;
10pub mod element_cache;
11pub mod emphasis_utils;
12pub mod fix_utils;
13pub mod header_id_utils;
14pub mod kramdown_utils;
15pub mod markdown_elements;
16pub mod mkdocs_admonitions;
17pub mod mkdocs_common;
18pub mod mkdocs_critic;
19pub mod mkdocs_footnotes;
20pub mod mkdocs_patterns;
21pub mod mkdocs_snippets;
22pub mod mkdocs_tabs;
23pub mod mkdocs_test_utils;
24pub mod mkdocstrings_refs;
25pub mod range_utils;
26pub mod regex_cache;
27pub mod skip_context;
28pub mod string_interner;
29pub mod table_utils;
30pub mod text_reflow;
31
32pub use ast_utils::AstCache;
33pub use code_block_utils::CodeBlockUtils;
34pub use document_structure::DocumentStructure;
35pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
36
37/// Detect the predominant line ending style in content
38pub fn detect_line_ending(content: &str) -> &'static str {
39    let crlf_count = content.matches("\r\n").count();
40    let lf_count = content.matches('\n').count() - crlf_count;
41
42    if crlf_count > lf_count { "\r\n" } else { "\n" }
43}
44pub use range_utils::LineIndex;
45
46/// Trait for string-related extensions
47pub trait StrExt {
48    /// Replace trailing spaces with a specified replacement string
49    fn replace_trailing_spaces(&self, replacement: &str) -> String;
50
51    /// Check if the string has trailing whitespace
52    fn has_trailing_spaces(&self) -> bool;
53
54    /// Count the number of trailing spaces in the string
55    fn trailing_spaces(&self) -> usize;
56}
57
58impl StrExt for str {
59    fn replace_trailing_spaces(&self, replacement: &str) -> String {
60        // Custom implementation to handle both newlines and tabs specially
61
62        // Check if string ends with newline
63        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
64            (stripped, true)
65        } else {
66            (self, false)
67        };
68
69        // Find where the trailing spaces begin
70        let mut non_space_len = content.len();
71        for c in content.chars().rev() {
72            if c == ' ' {
73                non_space_len -= 1;
74            } else {
75                break;
76            }
77        }
78
79        // Build the final string
80        let mut result =
81            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
82        result.push_str(&content[..non_space_len]);
83        result.push_str(replacement);
84        if ends_with_newline {
85            result.push('\n');
86        }
87
88        result
89    }
90
91    fn has_trailing_spaces(&self) -> bool {
92        self.trailing_spaces() > 0
93    }
94
95    fn trailing_spaces(&self) -> usize {
96        // Custom implementation to handle both newlines and tabs specially
97
98        // Prepare the string without newline if it ends with one
99        let content = self.strip_suffix('\n').unwrap_or(self);
100
101        // Count only trailing spaces at the end, not tabs
102        let mut space_count = 0;
103        for c in content.chars().rev() {
104            if c == ' ' {
105                space_count += 1;
106            } else {
107                break;
108            }
109        }
110
111        space_count
112    }
113}
114
115use std::collections::hash_map::DefaultHasher;
116use std::hash::{Hash, Hasher};
117
118/// Fast hash function for string content
119///
120/// This utility function provides a quick way to generate a hash from string content
121/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
122///
123/// # Arguments
124///
125/// * `content` - The string content to hash
126///
127/// # Returns
128///
129/// A 64-bit hash value derived from the content
130pub fn fast_hash(content: &str) -> u64 {
131    let mut hasher = DefaultHasher::new();
132    content.hash(&mut hasher);
133    hasher.finish()
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn test_detect_line_ending_pure_lf() {
142        // Test content with only LF line endings
143        let content = "First line\nSecond line\nThird line\n";
144        assert_eq!(detect_line_ending(content), "\n");
145    }
146
147    #[test]
148    fn test_detect_line_ending_pure_crlf() {
149        // Test content with only CRLF line endings
150        let content = "First line\r\nSecond line\r\nThird line\r\n";
151        assert_eq!(detect_line_ending(content), "\r\n");
152    }
153
154    #[test]
155    fn test_detect_line_ending_mixed_more_lf() {
156        // Test content with mixed line endings where LF is more common
157        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
158        assert_eq!(detect_line_ending(content), "\n");
159    }
160
161    #[test]
162    fn test_detect_line_ending_mixed_more_crlf() {
163        // Test content with mixed line endings where CRLF is more common
164        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
165        assert_eq!(detect_line_ending(content), "\r\n");
166    }
167
168    #[test]
169    fn test_detect_line_ending_empty_string() {
170        // Test empty string - should default to LF
171        let content = "";
172        assert_eq!(detect_line_ending(content), "\n");
173    }
174
175    #[test]
176    fn test_detect_line_ending_single_line_no_ending() {
177        // Test single line without any line endings - should default to LF
178        let content = "This is a single line with no line ending";
179        assert_eq!(detect_line_ending(content), "\n");
180    }
181
182    #[test]
183    fn test_detect_line_ending_equal_lf_and_crlf() {
184        // Test edge case with equal number of CRLF and LF
185        // Since LF count is calculated as total '\n' minus CRLF count,
186        // and the algorithm uses > (not >=), it should default to LF
187        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
188        assert_eq!(detect_line_ending(content), "\n");
189    }
190
191    #[test]
192    fn test_detect_line_ending_single_lf() {
193        // Test with just a single LF
194        let content = "Line 1\n";
195        assert_eq!(detect_line_ending(content), "\n");
196    }
197
198    #[test]
199    fn test_detect_line_ending_single_crlf() {
200        // Test with just a single CRLF
201        let content = "Line 1\r\n";
202        assert_eq!(detect_line_ending(content), "\r\n");
203    }
204
205    #[test]
206    fn test_detect_line_ending_embedded_cr() {
207        // Test with CR characters that are not part of CRLF
208        // These should not affect the count
209        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
210        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
211        assert_eq!(detect_line_ending(content), "\n");
212    }
213}