Skip to main content

rumdl_lib/utils/
mod.rs

1//!
2//! Shared utilities for rumdl, including document structure analysis, code block handling, regex helpers, and string extensions.
3//! Provides reusable traits and functions for rule implementations and core linter logic.
4
5pub mod anchor_styles;
6pub mod blockquote;
7pub mod code_block_utils;
8// DocumentStructure has been merged into LintContext
9// pub mod document_structure;
10pub mod early_returns;
11pub mod element_cache;
12pub mod emphasis_utils;
13pub mod fix_utils;
14pub mod header_id_utils;
15pub mod jinja_utils;
16pub mod kramdown_utils;
17pub mod line_ending;
18pub mod markdown_elements;
19pub mod mkdocs_abbreviations;
20pub mod mkdocs_admonitions;
21pub mod mkdocs_attr_list;
22pub mod mkdocs_common;
23pub mod mkdocs_critic;
24pub mod mkdocs_definition_lists;
25pub mod mkdocs_extensions;
26pub mod mkdocs_footnotes;
27pub mod mkdocs_html_markdown;
28pub mod mkdocs_icons;
29pub mod mkdocs_patterns;
30pub mod mkdocs_snippets;
31pub mod mkdocs_tabs;
32pub mod mkdocs_test_utils;
33pub mod mkdocstrings_refs;
34pub mod quarto_divs;
35pub mod range_utils;
36pub mod regex_cache;
37pub mod sentence_utils;
38pub mod skip_context;
39pub mod string_interner;
40pub mod table_utils;
41pub mod text_reflow;
42pub mod utf8_offsets;
43
44pub use code_block_utils::CodeBlockUtils;
45// pub use document_structure::DocumentStructure;
46pub use line_ending::{
47    LineEnding, detect_line_ending, detect_line_ending_enum, ensure_consistent_line_endings, get_line_ending_str,
48    normalize_line_ending,
49};
50pub use markdown_elements::{ElementQuality, ElementType, MarkdownElement, MarkdownElements};
51pub use range_utils::LineIndex;
52
53/// Check if a line is a definition list item (Extended Markdown)
54///
55/// Definition lists use the pattern:
56/// ```text
57/// Term
58/// : Definition
59/// ```
60///
61/// Supported by: PHP Markdown Extra, Kramdown, Pandoc, Hugo, and others
62pub fn is_definition_list_item(line: &str) -> bool {
63    let trimmed = line.trim_start();
64    trimmed.starts_with(": ")
65        || (trimmed.starts_with(':') && trimmed.len() > 1 && trimmed.chars().nth(1).is_some_and(|c| c.is_whitespace()))
66}
67
68/// Trait for string-related extensions
69pub trait StrExt {
70    /// Replace trailing spaces with a specified replacement string
71    fn replace_trailing_spaces(&self, replacement: &str) -> String;
72
73    /// Check if the string has trailing whitespace
74    fn has_trailing_spaces(&self) -> bool;
75
76    /// Count the number of trailing spaces in the string
77    fn trailing_spaces(&self) -> usize;
78}
79
80impl StrExt for str {
81    fn replace_trailing_spaces(&self, replacement: &str) -> String {
82        // Custom implementation to handle both newlines and tabs specially
83
84        // Check if string ends with newline
85        let (content, ends_with_newline) = if let Some(stripped) = self.strip_suffix('\n') {
86            (stripped, true)
87        } else {
88            (self, false)
89        };
90
91        // Find where the trailing spaces begin
92        let mut non_space_len = content.len();
93        for c in content.chars().rev() {
94            if c == ' ' {
95                non_space_len -= 1;
96            } else {
97                break;
98            }
99        }
100
101        // Build the final string
102        let mut result =
103            String::with_capacity(non_space_len + replacement.len() + if ends_with_newline { 1 } else { 0 });
104        result.push_str(&content[..non_space_len]);
105        result.push_str(replacement);
106        if ends_with_newline {
107            result.push('\n');
108        }
109
110        result
111    }
112
113    fn has_trailing_spaces(&self) -> bool {
114        self.trailing_spaces() > 0
115    }
116
117    fn trailing_spaces(&self) -> usize {
118        // Custom implementation to handle both newlines and tabs specially
119
120        // Prepare the string without newline if it ends with one
121        let content = self.strip_suffix('\n').unwrap_or(self);
122
123        // Count only trailing spaces at the end, not tabs
124        let mut space_count = 0;
125        for c in content.chars().rev() {
126            if c == ' ' {
127                space_count += 1;
128            } else {
129                break;
130            }
131        }
132
133        space_count
134    }
135}
136
137use std::collections::hash_map::DefaultHasher;
138use std::hash::{Hash, Hasher};
139
140/// Fast hash function for string content
141///
142/// This utility function provides a quick way to generate a hash from string content
143/// for use in caching mechanisms. It uses Rust's built-in DefaultHasher.
144///
145/// # Arguments
146///
147/// * `content` - The string content to hash
148///
149/// # Returns
150///
151/// A 64-bit hash value derived from the content
152pub fn fast_hash(content: &str) -> u64 {
153    let mut hasher = DefaultHasher::new();
154    content.hash(&mut hasher);
155    hasher.finish()
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161
162    #[test]
163    fn test_detect_line_ending_pure_lf() {
164        // Test content with only LF line endings
165        let content = "First line\nSecond line\nThird line\n";
166        assert_eq!(detect_line_ending(content), "\n");
167    }
168
169    #[test]
170    fn test_detect_line_ending_pure_crlf() {
171        // Test content with only CRLF line endings
172        let content = "First line\r\nSecond line\r\nThird line\r\n";
173        assert_eq!(detect_line_ending(content), "\r\n");
174    }
175
176    #[test]
177    fn test_detect_line_ending_mixed_more_lf() {
178        // Test content with mixed line endings where LF is more common
179        let content = "First line\nSecond line\r\nThird line\nFourth line\n";
180        assert_eq!(detect_line_ending(content), "\n");
181    }
182
183    #[test]
184    fn test_detect_line_ending_mixed_more_crlf() {
185        // Test content with mixed line endings where CRLF is more common
186        let content = "First line\r\nSecond line\r\nThird line\nFourth line\r\n";
187        assert_eq!(detect_line_ending(content), "\r\n");
188    }
189
190    #[test]
191    fn test_detect_line_ending_empty_string() {
192        // Test empty string - should default to LF
193        let content = "";
194        assert_eq!(detect_line_ending(content), "\n");
195    }
196
197    #[test]
198    fn test_detect_line_ending_single_line_no_ending() {
199        // Test single line without any line endings - should default to LF
200        let content = "This is a single line with no line ending";
201        assert_eq!(detect_line_ending(content), "\n");
202    }
203
204    #[test]
205    fn test_detect_line_ending_equal_lf_and_crlf() {
206        // Test edge case with equal number of CRLF and LF
207        // Since LF count is calculated as total '\n' minus CRLF count,
208        // and the algorithm uses > (not >=), it should default to LF
209        let content = "Line 1\r\nLine 2\nLine 3\r\nLine 4\n";
210        assert_eq!(detect_line_ending(content), "\n");
211    }
212
213    #[test]
214    fn test_detect_line_ending_single_lf() {
215        // Test with just a single LF
216        let content = "Line 1\n";
217        assert_eq!(detect_line_ending(content), "\n");
218    }
219
220    #[test]
221    fn test_detect_line_ending_single_crlf() {
222        // Test with just a single CRLF
223        let content = "Line 1\r\n";
224        assert_eq!(detect_line_ending(content), "\r\n");
225    }
226
227    #[test]
228    fn test_detect_line_ending_embedded_cr() {
229        // Test with CR characters that are not part of CRLF
230        // These should not affect the count
231        let content = "Line 1\rLine 2\nLine 3\r\nLine 4\n";
232        // This has 1 CRLF and 2 LF (after subtracting the CRLF)
233        assert_eq!(detect_line_ending(content), "\n");
234    }
235}