oxihuman_core/
whitespace_normalizer.rs1#![allow(dead_code)]
4
5#[derive(Debug, Clone, PartialEq)]
12pub enum LineEnding {
13 Lf,
14 CrLf,
15 Cr,
16}
17
18impl LineEnding {
19 pub fn as_str(&self) -> &'static str {
20 match self {
21 LineEnding::Lf => "\n",
22 LineEnding::CrLf => "\r\n",
23 LineEnding::Cr => "\r",
24 }
25 }
26}
27
28#[derive(Debug, Clone)]
30pub struct NormalizerConfig {
31 pub target_ending: LineEnding,
32 pub trim_trailing: bool,
33 pub max_blank_lines: usize,
34 pub ensure_final_newline: bool,
35}
36
37impl Default for NormalizerConfig {
38 fn default() -> Self {
39 Self {
40 target_ending: LineEnding::Lf,
41 trim_trailing: true,
42 max_blank_lines: 2,
43 ensure_final_newline: true,
44 }
45 }
46}
47
48#[derive(Debug, Clone, Default)]
50pub struct WhitespaceStats {
51 pub crlf_count: usize,
52 pub trailing_whitespace_lines: usize,
53 pub excess_blank_lines: usize,
54 pub missing_final_newline: bool,
55}
56
57impl WhitespaceStats {
58 pub fn has_issues(&self) -> bool {
59 self.crlf_count > 0
60 || self.trailing_whitespace_lines > 0
61 || self.excess_blank_lines > 0
62 || self.missing_final_newline
63 }
64}
65
66pub fn detect_issues(text: &str, cfg: &NormalizerConfig) -> WhitespaceStats {
68 let mut stats = WhitespaceStats::default();
69 let mut blank_run = 0usize;
70
71 stats.crlf_count = text.matches("\r\n").count();
72
73 for line in text.lines() {
74 if line.is_empty() {
75 blank_run += 1;
76 if blank_run > cfg.max_blank_lines {
77 stats.excess_blank_lines += 1;
78 }
79 } else {
80 blank_run = 0;
81 if line != line.trim_end() {
82 stats.trailing_whitespace_lines += 1;
83 }
84 }
85 }
86
87 stats.missing_final_newline =
88 cfg.ensure_final_newline && !text.ends_with('\n') && !text.ends_with("\r\n");
89 stats
90}
91
92pub fn normalize(text: &str, cfg: &NormalizerConfig) -> String {
94 let unified = text.replace("\r\n", "\n").replace('\r', "\n");
96 let ending = cfg.target_ending.as_str();
97
98 let mut out = String::with_capacity(unified.len());
99 let mut blank_run = 0usize;
100
101 for line in unified.lines() {
102 let processed = if cfg.trim_trailing {
103 line.trim_end()
104 } else {
105 line
106 };
107 if processed.is_empty() {
108 blank_run += 1;
109 if blank_run <= cfg.max_blank_lines {
110 out.push_str(ending);
111 }
112 } else {
113 blank_run = 0;
114 out.push_str(processed);
115 out.push_str(ending);
116 }
117 }
118
119 if cfg.ensure_final_newline && !out.ends_with(ending) {
121 out.push_str(ending);
122 }
123
124 out
125}
126
127pub fn trailing_whitespace_count(line: &str) -> usize {
129 line.len().saturating_sub(line.trim_end().len())
130}
131
132pub fn strip_trailing(text: &str) -> String {
134 text.lines()
135 .map(str::trim_end)
136 .collect::<Vec<_>>()
137 .join("\n")
138 + "\n"
139}
140
141pub fn collapse_blank_lines(text: &str, max: usize) -> String {
143 let mut out = String::new();
144 let mut blank = 0usize;
145 for line in text.lines() {
146 if line.trim().is_empty() {
147 blank += 1;
148 if blank <= max {
149 out.push('\n');
150 }
151 } else {
152 blank = 0;
153 out.push_str(line);
154 out.push('\n');
155 }
156 }
157 out
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_detect_crlf() {
166 let cfg = NormalizerConfig::default();
167 let stats = detect_issues("line1\r\nline2\r\n", &cfg);
168 assert_eq!(stats.crlf_count, 2);
169 }
170
171 #[test]
172 fn test_detect_trailing_whitespace() {
173 let cfg = NormalizerConfig::default();
174 let stats = detect_issues("line \nclean\n", &cfg);
175 assert_eq!(stats.trailing_whitespace_lines, 1);
176 }
177
178 #[test]
179 fn test_normalize_crlf_to_lf() {
180 let cfg = NormalizerConfig::default();
181 let result = normalize("a\r\nb\r\n", &cfg);
182 assert!(!result.contains('\r'));
183 }
184
185 #[test]
186 fn test_normalize_trim_trailing() {
187 let cfg = NormalizerConfig::default();
188 let result = normalize("hello \n", &cfg);
189 assert_eq!(result, "hello\n");
190 }
191
192 #[test]
193 fn test_collapse_blank_lines() {
194 let text = "a\n\n\n\nb\n";
195 let result = collapse_blank_lines(text, 1);
196 assert!(result.matches('\n').count() < text.matches('\n').count());
197 }
198
199 #[test]
200 fn test_strip_trailing() {
201 let result = strip_trailing(" hi \n");
202 assert_eq!(result, " hi\n");
203 }
204
205 #[test]
206 fn test_trailing_whitespace_count() {
207 assert_eq!(trailing_whitespace_count("abc "), 3);
208 assert_eq!(trailing_whitespace_count("abc"), 0);
209 }
210
211 #[test]
212 fn test_has_issues_false_for_clean() {
213 let cfg = NormalizerConfig::default();
214 let stats = detect_issues("clean line\n", &cfg);
215 assert!(!stats.has_issues());
216 }
217
218 #[test]
219 fn test_line_ending_as_str() {
220 assert_eq!(LineEnding::Lf.as_str(), "\n");
221 assert_eq!(LineEnding::CrLf.as_str(), "\r\n");
222 }
223}