Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8// DocumentStructure has been merged into LintContext
9// pub mod document_structure;
10pub mod early_returns;
11pub mod element_cache;
12pub mod emphasis_utils;
13pub mod fix_utils;
14pub mod header_id_utils;
15pub mod jinja_utils;
16pub mod kramdown_utils;
17pub mod line_ending;
18pub mod markdown_elements;
19pub mod mkdocs_abbreviations;
20pub mod mkdocs_admonitions;
21pub mod mkdocs_attr_list;
22pub mod mkdocs_common;
23pub mod mkdocs_critic;
24pub mod mkdocs_definition_lists;
25pub mod mkdocs_extensions;
26pub mod mkdocs_footnotes;
27pub mod mkdocs_icons;
28pub mod mkdocs_patterns;
29pub mod mkdocs_snippets;
30pub mod mkdocs_tabs;
31pub mod mkdocs_test_utils;
32pub mod mkdocstrings_refs;
33pub mod quarto_divs;
34pub mod range_utils;
35pub mod regex_cache;
36pub mod sentence_utils;
37pub mod skip_context;
38pub mod string_interner;
39pub mod table_utils;
40pub mod text_reflow;
41pub mod utf8_offsets;
42
43pub use code_block_utils::CodeBlockUtils;
44// pub use document_structure::DocumentStructure;
45pub use line_ending::{
46    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
47    normalize_line_ending,
48};
49pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
50pub use range_utils::LineIndex;
51
52/// Check if a line is a definition list item (Extended Markdown)
53///
54/// Definition lists use the pattern:
55/// ```text
56/// Term
57/// : Definition
58/// ```
59///
60/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
61pub fn is_definition_list_item(line: &str) -> bool {
62    let trimmed = line.trim_start();
63    trimmed.starts_with(": ")
64        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
65}
66
67/// Trait for string-related extensions
68pub trait StrExt {
69    /// Replace trailing spaces with a specified replacement string
70    fn replace_trailing_spaces(&self, replacement: &str) -> String;
71
72    /// Check if the string has trailing whitespace
73    fn has_trailing_spaces(&self) -> bool;
74
75    /// Count the number of trailing spaces in the string
76    fn trailing_spaces(&self) -> usize;
77}
78
79impl StrExt for str {
80    fn replace_trailing_spaces(&self, replacement: &str) -> String {
81        // Custom implementation to handle both newlines and tabs specially
82
83        // Check if string ends with newline
84        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
85            (stripped, true)
86        } else {
87            (self, false)
88        };
89
90        // Find where the trailing spaces begin
91        let mut non_space_len = content.len();
92        for c in content.chars().rev() {
93            if c == ' ' {
94                non_space_len -= 1;
95            } else {
96                break;
97            }
98        }
99
100        // Build the final string
101        let mut result =
102            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
103        result.push_str(&content[..non_space_len]);
104        result.push_str(replacement);
105        if ends_with_newline {
106            result.push('\n');
107        }
108
109        result
110    }
111
112    fn has_trailing_spaces(&self) -> bool {
113        self.trailing_spaces() > 0
114    }
115
116    fn trailing_spaces(&self) -> usize {
117        // Custom implementation to handle both newlines and tabs specially
118
119        // Prepare the string without newline if it ends with one
120        let content = self.strip_suffix('\n').unwrap_or(self);
121
122        // Count only trailing spaces at the end, not tabs
123        let mut space_count = 0;
124        for c in content.chars().rev() {
125            if c == ' ' {
126                space_count += 1;
127            } else {
128                break;
129            }
130        }
131
132        space_count
133    }
134}
135
136use std::collections::hash_map::DefaultHasher;
137use std::hash::{Hash, Hasher};
138
139/// Fast hash function for string content
140///
141/// This utility function provides a quick way to generate a hash from string content
142/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
143///
144/// # Arguments
145///
146/// * `content` - The string content to hash
147///
148/// # Returns
149///
150/// A 64-bit hash value derived from the content
151pub fn fast_hash(content: &str) -> u64 {
152    let mut hasher = DefaultHasher::new();
153    content.hash(&mut hasher);
154    hasher.finish()
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_detect_line_ending_pure_lf() {
163        // Test content with only LF line endings
164        let content = "First line\nSecond line\nThird line\n";
165        assert_eq!(detect_line_ending(content), "\n");
166    }
167
168    #[test]
169    fn test_detect_line_ending_pure_crlf() {
170        // Test content with only CRLF line endings
171        let content = "First line\r\nSecond line\r\nThird line\r\n";
172        assert_eq!(detect_line_ending(content), "\r\n");
173    }
174
175    #[test]
176    fn test_detect_line_ending_mixed_more_lf() {
177        // Test content with mixed line endings where LF is more common
178        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
179        assert_eq!(detect_line_ending(content), "\n");
180    }
181
182    #[test]
183    fn test_detect_line_ending_mixed_more_crlf() {
184        // Test content with mixed line endings where CRLF is more common
185        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
186        assert_eq!(detect_line_ending(content), "\r\n");
187    }
188
189    #[test]
190    fn test_detect_line_ending_empty_string() {
191        // Test empty string - should default to LF
192        let content = "";
193        assert_eq!(detect_line_ending(content), "\n");
194    }
195
196    #[test]
197    fn test_detect_line_ending_single_line_no_ending() {
198        // Test single line without any line endings - should default to LF
199        let content = "This is a single line with no line ending";
200        assert_eq!(detect_line_ending(content), "\n");
201    }
202
203    #[test]
204    fn test_detect_line_ending_equal_lf_and_crlf() {
205        // Test edge case with equal number of CRLF and LF
206        // Since LF count is calculated as total '\n' minus CRLF count,
207        // and the algorithm uses > (not >=), it should default to LF
208        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
209        assert_eq!(detect_line_ending(content), "\n");
210    }
211
212    #[test]
213    fn test_detect_line_ending_single_lf() {
214        // Test with just a single LF
215        let content = "Line 1\n";
216        assert_eq!(detect_line_ending(content), "\n");
217    }
218
219    #[test]
220    fn test_detect_line_ending_single_crlf() {
221        // Test with just a single CRLF
222        let content = "Line 1\r\n";
223        assert_eq!(detect_line_ending(content), "\r\n");
224    }
225
226    #[test]
227    fn test_detect_line_ending_embedded_cr() {
228        // Test with CR characters that are not part of CRLF
229        // These should not affect the count
230        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
231        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
232        assert_eq!(detect_line_ending(content), "\n");
233    }
234}