simple_fs/reshape/
normalizer.rs

1//! Path normalization functions
2//!
3//! Normalize path strings by collapsing redundant separators and handling platform-specific quirks.
4
5use camino::{Utf8Path, Utf8PathBuf};
6
7/// Checks if a path needs normalization.
8/// - If it contains a `\`
9/// - If it has two or more consecutive `//`
10/// - If it contains one or more `/./`
11///
12/// Note: This performs a single pass and returns as early as possible.
13pub fn needs_normalize(path: &Utf8Path) -> bool {
14	let path_str = path.as_str();
15	let mut chars = path_str.chars().peekable();
16
17	// Check for \\?\ prefix
18	if path_str.starts_with(r"\\?\") {
19		return true;
20	}
21
22	while let Some(c) = chars.next() {
23		match c {
24			'\\' => return true,
25			'/' => match chars.peek() {
26				Some('/') => return true,
27				Some('.') => {
28					let mut lookahead = chars.clone();
29					lookahead.next(); // consume '.'
30					match lookahead.peek() {
31						Some('/') | None => return true,
32						_ => {}
33					}
34				}
35				_ => {}
36			},
37			_ => {}
38		}
39	}
40
41	false
42}
43/// Normalizes a path by:
44/// - Converting backslashes to forward slashes
45/// - Collapsing multiple consecutive slashes to single slashes
46/// - Removing single dots except at the start
47/// - Removing Windows-specific `\\?\` prefix
48///
49/// The function performs a quick check to determine if normalization is actually needed.
50/// If no normalization is required, it returns the original path to avoid unnecessary allocations.
51pub fn into_normalized(path: Utf8PathBuf) -> Utf8PathBuf {
52	// Quick check to see if any normalization is needed
53	let path_str = path.as_str();
54
55	// Check for conditions that require normalization
56	let needs_normalization = needs_normalize(&path);
57
58	if !needs_normalization {
59		return path;
60	}
61
62	// Perform normalization
63	let mut result = String::with_capacity(path_str.len());
64	let mut chars = path_str.chars().peekable();
65	let mut last_was_slash = false;
66
67	// Handle Windows UNC path prefix (\\?\)
68	if path_str.starts_with(r"\\?\") {
69		for _ in 0..4 {
70			chars.next(); // Skip the first 4 chars
71		}
72	}
73
74	while let Some(c) = chars.next() {
75		match c {
76			'\\' | '/' => {
77				// Convert backslash to forward slash and collapse consecutive slashes
78				if !last_was_slash {
79					result.push('/');
80					last_was_slash = true;
81				}
82			}
83			'.' => {
84				// Special handling for dots
85				if last_was_slash {
86					// Look ahead to check if this is a "/./" pattern
87					match chars.peek() {
88						Some(&'/') | Some(&'\\') => {
89							// Skip single dot if it's not at the start
90							if !result.is_empty() {
91								chars.next(); // Skip the next slash
92								continue;
93							}
94						}
95						// Check if it's a "../" pattern (which we want to keep)
96						Some(&'.') => {
97							result.push('.');
98							last_was_slash = false;
99						}
100						// Something else
101						_ => {
102							result.push('.');
103							last_was_slash = false;
104						}
105					}
106				} else {
107					result.push('.');
108					last_was_slash = false;
109				}
110			}
111			_ => {
112				result.push(c);
113				last_was_slash = false;
114			}
115		}
116	}
117
118	// If the original path ended with a slash, ensure the normalized path does too
119	if (path_str.ends_with('/') || path_str.ends_with('\\')) && !result.ends_with('/') {
120		result.push('/');
121	}
122
123	Utf8PathBuf::from(result)
124}
125
126// region:    --- Tests
127
128#[cfg(test)]
129mod tests {
130	type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; // For tests.
131
132	use super::*;
133
134	#[test]
135	fn test_normalizer_into_normalize_backslashes() -> Result<()> {
136		// -- Setup & Fixtures
137		let paths = [
138			(r"C:\Users\name\file.txt", "C:/Users/name/file.txt"),
139			(r"path\to\file.txt", "path/to/file.txt"),
140			(r"mixed/path\style", "mixed/path/style"),
141		];
142
143		// -- Exec & Check
144		for (input, expected) in paths {
145			let path = Utf8PathBuf::from(input);
146			let normalized = into_normalized(path);
147			assert_eq!(
148				normalized.as_str(),
149				expected,
150				"Failed to normalize backslashes in '{input}'"
151			);
152		}
153
154		Ok(())
155	}
156
157	#[test]
158	fn test_normalizer_into_normalize_multiple_slashes() -> Result<()> {
159		// -- Setup & Fixtures
160		let paths = [
161			("//path//to///file.txt", "/path/to/file.txt"),
162			("path////file.txt", "path/file.txt"),
163			(r"\\server\\share\\file.txt", "/server/share/file.txt"),
164		];
165
166		// -- Exec & Check
167		for (input, expected) in paths {
168			let path = Utf8PathBuf::from(input);
169			let normalized = into_normalized(path);
170			assert_eq!(
171				normalized.as_str(),
172				expected,
173				"Failed to collapse multiple slashes in '{input}'"
174			);
175		}
176
177		Ok(())
178	}
179
180	#[test]
181	fn test_normalizer_into_normalize_single_dots() -> Result<()> {
182		// -- Setup & Fixtures
183		let paths = [
184			("path/./file.txt", "path/file.txt"),
185			("./path/./to/./file.txt", "./path/to/file.txt"),
186			("path/to/./././file.txt", "path/to/file.txt"),
187		];
188
189		// -- Exec & Check
190		for (input, expected) in paths {
191			let path = Utf8PathBuf::from(input);
192			let normalized = into_normalized(path);
193			assert_eq!(
194				normalized.as_str(),
195				expected,
196				"Failed to handle single dots correctly in '{input}'"
197			);
198		}
199
200		Ok(())
201	}
202
203	#[test]
204	fn test_normalizer_into_normalize_preserve_parent_dirs() -> Result<()> {
205		// -- Setup & Fixtures
206		let paths = [
207			("path/../file.txt", "path/../file.txt"),
208			("../path/file.txt", "../path/file.txt"),
209			("path/../../file.txt", "path/../../file.txt"),
210		];
211
212		// -- Exec & Check
213		for (input, expected) in paths {
214			let path = Utf8PathBuf::from(input);
215			let normalized = into_normalized(path);
216			assert_eq!(
217				normalized.as_str(),
218				expected,
219				"Should preserve parent directory references in '{input}'"
220			);
221		}
222
223		Ok(())
224	}
225
226	#[test]
227	fn test_normalizer_into_normalize_windows_prefix() -> Result<()> {
228		// -- Setup & Fixtures
229		let paths = [
230			(r"\\?\C:\Users\name\file.txt", "C:/Users/name/file.txt"),
231			(r"\\?\UNC\server\share", "UNC/server/share"),
232		];
233
234		// -- Exec & Check
235		for (input, expected) in paths {
236			let path = Utf8PathBuf::from(input);
237			let normalized = into_normalized(path);
238			assert_eq!(
239				normalized.as_str(),
240				expected,
241				"Failed to remove Windows prefix in '{input}'"
242			);
243		}
244
245		Ok(())
246	}
247
248	#[test]
249	fn test_normalizer_into_normalize_no_change_needed() -> Result<()> {
250		// -- Setup & Fixtures
251		let paths = ["path/to/file.txt", "/absolute/path/file.txt", "../parent/dir", "file.txt"];
252
253		// -- Exec & Check
254		for input in paths {
255			let path = Utf8PathBuf::from(input);
256			let path_clone = path.clone();
257			let normalized = into_normalized(path);
258			// This should be a simple identity return with no changes
259			assert_eq!(
260				normalized, path_clone,
261				"Path should not change when normalization not needed"
262			);
263		}
264
265		Ok(())
266	}
267
268	#[test]
269	fn test_normalizer_into_normalize_trailing_slash() -> Result<()> {
270		// -- Setup & Fixtures
271		let paths = [
272			("path/to/dir/", "path/to/dir/"),
273			(r"path\to\dir\", "path/to/dir/"),
274			("path//to///dir///", "path/to/dir/"),
275		];
276
277		// -- Exec & Check
278		for (input, expected) in paths {
279			let path = Utf8PathBuf::from(input);
280			let normalized = into_normalized(path);
281			assert_eq!(
282				normalized.as_str(),
283				expected,
284				"Should preserve trailing slash in '{input}'"
285			);
286		}
287
288		Ok(())
289	}
290
291	#[test]
292	fn test_normalizer_into_normalize_complex_paths() -> Result<()> {
293		// -- Setup & Fixtures
294		let paths = [
295			(
296				r"C:\Users\.\name\..\admin\//docs\file.txt",
297				"C:/Users/name/../admin/docs/file.txt",
298			),
299			(
300				r"\\?\C:\Program Files\\.\multiple//slashes",
301				"C:/Program Files/multiple/slashes",
302			),
303			("./current/dir/./file.txt", "./current/dir/file.txt"),
304		];
305
306		// -- Exec & Check
307		for (input, expected) in paths {
308			let path = Utf8PathBuf::from(input);
309			let normalized = into_normalized(path);
310			assert_eq!(
311				normalized.as_str(),
312				expected,
313				"Failed to normalize complex path '{input}'"
314			);
315		}
316
317		Ok(())
318	}
319}
320
321// endregion: --- Tests