rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod ast_utils;
7pub mod code_block_utils;
8pub mod document_structure;
9pub mod early_returns;
10pub mod element_cache;
11pub mod emphasis_utils;
12pub mod fix_utils;
13pub mod header_id_utils;
14pub mod kramdown_utils;
15pub mod markdown_elements;
16pub mod range_utils;
17pub mod regex_cache;
18pub mod skip_context;
19pub mod string_interner;
20pub mod table_utils;
21pub mod text_reflow;
22
23pub use ast_utils::AstCache;
24pub use code_block_utils::CodeBlockUtils;
25pub use document_structure::DocumentStructure;
26pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
27
28/// Detect the predominant line ending style in content
29pub fn detect_line_ending(content: &str) -> &'static str {
30    let crlf_count = content.matches("\r\n").count();
31    let lf_count = content.matches('\n').count() - crlf_count;
32
33    if crlf_count > lf_count { "\r\n" } else { "\n" }
34}
35pub use range_utils::LineIndex;
36
37/// Trait for string-related extensions
38pub trait StrExt {
39    /// Replace trailing spaces with a specified replacement string
40    fn replace_trailing_spaces(&self, replacement: &str) -> String;
41
42    /// Check if the string has trailing whitespace
43    fn has_trailing_spaces(&self) -> bool;
44
45    /// Count the number of trailing spaces in the string
46    fn trailing_spaces(&self) -> usize;
47}
48
49impl StrExt for str {
50    fn replace_trailing_spaces(&self, replacement: &str) -> String {
51        // Custom implementation to handle both newlines and tabs specially
52
53        // Check if string ends with newline
54        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
55            (stripped, true)
56        } else {
57            (self, false)
58        };
59
60        // Find where the trailing spaces begin
61        let mut non_space_len = content.len();
62        for c in content.chars().rev() {
63            if c == ' ' {
64                non_space_len -= 1;
65            } else {
66                break;
67            }
68        }
69
70        // Build the final string
71        let mut result =
72            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
73        result.push_str(&content[..non_space_len]);
74        result.push_str(replacement);
75        if ends_with_newline {
76            result.push('\n');
77        }
78
79        result
80    }
81
82    fn has_trailing_spaces(&self) -> bool {
83        self.trailing_spaces() > 0
84    }
85
86    fn trailing_spaces(&self) -> usize {
87        // Custom implementation to handle both newlines and tabs specially
88
89        // Prepare the string without newline if it ends with one
90        let content = self.strip_suffix('\n').unwrap_or(self);
91
92        // Count only trailing spaces at the end, not tabs
93        let mut space_count = 0;
94        for c in content.chars().rev() {
95            if c == ' ' {
96                space_count += 1;
97            } else {
98                break;
99            }
100        }
101
102        space_count
103    }
104}
105
106use std::collections::hash_map::DefaultHasher;
107use std::hash::{Hash, Hasher};
108
109/// Fast hash function for string content
110///
111/// This utility function provides a quick way to generate a hash from string content
112/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
113///
114/// # Arguments
115///
116/// * `content` - The string content to hash
117///
118/// # Returns
119///
120/// A 64-bit hash value derived from the content
121pub fn fast_hash(content: &str) -> u64 {
122    let mut hasher = DefaultHasher::new();
123    content.hash(&mut hasher);
124    hasher.finish()
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn test_detect_line_ending_pure_lf() {
133        // Test content with only LF line endings
134        let content = "First line\nSecond line\nThird line\n";
135        assert_eq!(detect_line_ending(content), "\n");
136    }
137
138    #[test]
139    fn test_detect_line_ending_pure_crlf() {
140        // Test content with only CRLF line endings
141        let content = "First line\r\nSecond line\r\nThird line\r\n";
142        assert_eq!(detect_line_ending(content), "\r\n");
143    }
144
145    #[test]
146    fn test_detect_line_ending_mixed_more_lf() {
147        // Test content with mixed line endings where LF is more common
148        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
149        assert_eq!(detect_line_ending(content), "\n");
150    }
151
152    #[test]
153    fn test_detect_line_ending_mixed_more_crlf() {
154        // Test content with mixed line endings where CRLF is more common
155        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
156        assert_eq!(detect_line_ending(content), "\r\n");
157    }
158
159    #[test]
160    fn test_detect_line_ending_empty_string() {
161        // Test empty string - should default to LF
162        let content = "";
163        assert_eq!(detect_line_ending(content), "\n");
164    }
165
166    #[test]
167    fn test_detect_line_ending_single_line_no_ending() {
168        // Test single line without any line endings - should default to LF
169        let content = "This is a single line with no line ending";
170        assert_eq!(detect_line_ending(content), "\n");
171    }
172
173    #[test]
174    fn test_detect_line_ending_equal_lf_and_crlf() {
175        // Test edge case with equal number of CRLF and LF
176        // Since LF count is calculated as total '\n' minus CRLF count,
177        // and the algorithm uses > (not >=), it should default to LF
178        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
179        assert_eq!(detect_line_ending(content), "\n");
180    }
181
182    #[test]
183    fn test_detect_line_ending_single_lf() {
184        // Test with just a single LF
185        let content = "Line 1\n";
186        assert_eq!(detect_line_ending(content), "\n");
187    }
188
189    #[test]
190    fn test_detect_line_ending_single_crlf() {
191        // Test with just a single CRLF
192        let content = "Line 1\r\n";
193        assert_eq!(detect_line_ending(content), "\r\n");
194    }
195
196    #[test]
197    fn test_detect_line_ending_embedded_cr() {
198        // Test with CR characters that are not part of CRLF
199        // These should not affect the count
200        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
201        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
202        assert_eq!(detect_line_ending(content), "\n");
203    }
204}