ass_core/utils/utf8/
normalization.rs1use alloc::{string::String, vec::Vec};
18
19#[must_use]
42pub fn normalize_line_endings(text: &str) -> String {
43 text.replace("\r\n", "\n").replace('\r', "\n")
44}
45
46#[must_use]
60pub fn normalize_whitespace(text: &str, collapse_multiple: bool) -> String {
61 let mut result = text
62 .chars()
63 .map(|c| {
64 if c.is_whitespace() && c != '\n' && c != '\t' {
65 ' ' } else {
67 c
68 }
69 })
70 .collect::<String>();
71
72 if collapse_multiple {
73 result = collapse_consecutive_spaces(&result);
74 }
75
76 result
77}
78
79#[must_use]
93pub fn remove_control_chars(text: &str) -> String {
94 text.chars()
95 .filter(|&c| {
96 !c.is_control() || c == '\n' || c == '\t' || c == '\r'
98 })
99 .collect()
100}
101
102#[must_use]
116pub fn trim_lines(text: &str) -> String {
117 text.lines()
118 .map(str::trim)
119 .collect::<Vec<&str>>()
120 .join("\n")
121}
122
123fn collapse_consecutive_spaces(text: &str) -> String {
128 let mut result = String::with_capacity(text.len());
129 let mut prev_was_space = false;
130
131 for c in text.chars() {
132 if c == ' ' {
133 if !prev_was_space {
134 result.push(c);
135 prev_was_space = true;
136 }
137 } else {
138 result.push(c);
139 prev_was_space = false;
140 }
141 }
142
143 result
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn normalize_line_endings_windows() {
152 let input = "Line 1\r\nLine 2\r\nLine 3";
153 let normalized = normalize_line_endings(input);
154 assert_eq!(normalized, "Line 1\nLine 2\nLine 3");
155 }
156
157 #[test]
158 fn normalize_line_endings_mac() {
159 let input = "Line 1\rLine 2\rLine 3";
160 let normalized = normalize_line_endings(input);
161 assert_eq!(normalized, "Line 1\nLine 2\nLine 3");
162 }
163
164 #[test]
165 fn normalize_line_endings_mixed() {
166 let input = "Line 1\r\nLine 2\rLine 3\n";
167 let normalized = normalize_line_endings(input);
168 assert_eq!(normalized, "Line 1\nLine 2\nLine 3\n");
169 }
170
171 #[test]
172 fn normalize_line_endings_unix() {
173 let input = "Line 1\nLine 2\nLine 3\n";
174 let normalized = normalize_line_endings(input);
175 assert_eq!(normalized, "Line 1\nLine 2\nLine 3\n");
176 }
177
178 #[test]
179 fn normalize_whitespace_basic() {
180 let input = "Hello\u{00A0}World\u{2000}Test"; let normalized = normalize_whitespace(input, false);
182 assert_eq!(normalized, "Hello World Test");
183 }
184
185 #[test]
186 fn normalize_whitespace_preserve_structure() {
187 let input = "Hello\tWorld\nNext Line";
188 let normalized = normalize_whitespace(input, false);
189 assert_eq!(normalized, "Hello\tWorld\nNext Line");
190 }
191
192 #[test]
193 fn normalize_whitespace_collapse() {
194 let input = "Hello World Test";
195 let normalized = normalize_whitespace(input, true);
196 assert_eq!(normalized, "Hello World Test");
197 }
198
199 #[test]
200 fn normalize_whitespace_no_collapse() {
201 let input = "Hello World Test";
202 let normalized = normalize_whitespace(input, false);
203 assert_eq!(normalized, "Hello World Test");
204 }
205
206 #[test]
207 fn remove_control_chars_basic() {
208 let input = "Hello\x00World\x1FTest";
209 let cleaned = remove_control_chars(input);
210 assert_eq!(cleaned, "HelloWorldTest");
211 }
212
213 #[test]
214 fn remove_control_chars_preserve_essential() {
215 let input = "Hello\tWorld\nNext\rLine";
216 let cleaned = remove_control_chars(input);
217 assert_eq!(cleaned, "Hello\tWorld\nNext\rLine");
218 }
219
220 #[test]
221 fn trim_lines_basic() {
222 let input = " Line 1 \n\t Line 2 \t\n Line 3 ";
223 let trimmed = trim_lines(input);
224 assert_eq!(trimmed, "Line 1\nLine 2\nLine 3");
225 }
226
227 #[test]
228 fn trim_lines_empty_lines() {
229 let input = "Line 1\n \nLine 3";
230 let trimmed = trim_lines(input);
231 assert_eq!(trimmed, "Line 1\n\nLine 3");
232 }
233
234 #[test]
235 fn collapse_consecutive_spaces_basic() {
236 let input = "Hello World Test";
237 let collapsed = collapse_consecutive_spaces(input);
238 assert_eq!(collapsed, "Hello World Test");
239 }
240
241 #[test]
242 fn collapse_consecutive_spaces_preserve_other() {
243 let input = "Hello\t\tWorld\n\nTest";
244 let collapsed = collapse_consecutive_spaces(input);
245 assert_eq!(collapsed, "Hello\t\tWorld\n\nTest");
246 }
247
248 #[test]
249 fn normalization_chain() {
250 let input = " Line 1 \r\n\t Line 2 \t\r Line 3 ";
251 let normalized = normalize_line_endings(input);
252 let trimmed = trim_lines(&normalized);
253 let final_result = normalize_whitespace(&trimmed, true);
254 assert_eq!(final_result, "Line 1\nLine 2\nLine 3");
255 }
256}